linux/arch/ia64/kernel/unaligned.c
<<
>>
Prefs
   1/*
   2 * Architecture-specific unaligned trap handling.
   3 *
   4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
   5 *      Stephane Eranian <eranian@hpl.hp.com>
   6 *      David Mosberger-Tang <davidm@hpl.hp.com>
   7 *
   8 * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
   9 *              get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
  10 *              stacked register returns an undefined value; it does NOT trigger a
  11 *              "rsvd register fault").
  12 * 2001/10/11   Fix unaligned access to rotating registers in s/w pipelined loops.
  13 * 2001/08/13   Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  14 * 2001/01/17   Add support emulation of unaligned kernel accesses.
  15 */
  16#include <linux/kernel.h>
  17#include <linux/sched.h>
  18#include <linux/tty.h>
  19
  20#include <asm/intrinsics.h>
  21#include <asm/processor.h>
  22#include <asm/rse.h>
  23#include <asm/uaccess.h>
  24#include <asm/unaligned.h>
  25
  26extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
  27
  28#undef DEBUG_UNALIGNED_TRAP
  29
  30#ifdef DEBUG_UNALIGNED_TRAP
  31# define DPRINT(a...)   do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
  32# define DDUMP(str,vp,len)      dump(str, vp, len)
  33
  34static void
  35dump (const char *str, void *vp, size_t len)
  36{
  37        unsigned char *cp = vp;
  38        int i;
  39
  40        printk("%s", str);
  41        for (i = 0; i < len; ++i)
  42                printk (" %02x", *cp++);
  43        printk("\n");
  44}
  45#else
  46# define DPRINT(a...)
  47# define DDUMP(str,vp,len)
  48#endif
  49
  50#define IA64_FIRST_STACKED_GR   32
  51#define IA64_FIRST_ROTATING_FR  32
  52#define SIGN_EXT9               0xffffffffffffff00ul
  53
  54/*
  55 *  sysctl settable hook which tells the kernel whether to honor the
  56 *  IA64_THREAD_UAC_NOPRINT prctl.  Because this is user settable, we want
  57 *  to allow the super user to enable/disable this for security reasons
  58 *  (i.e. don't allow attacker to fill up logs with unaligned accesses).
  59 */
  60int no_unaligned_warning;
  61static int noprint_warning;
  62
  63/*
  64 * For M-unit:
  65 *
  66 *  opcode |   m  |   x6    |
  67 * --------|------|---------|
  68 * [40-37] | [36] | [35:30] |
  69 * --------|------|---------|
  70 *     4   |   1  |    6    | = 11 bits
  71 * --------------------------
  72 * However bits [31:30] are not directly useful to distinguish between
  73 * load/store so we can use [35:32] instead, which gives the following
  74 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
  75 * checking the m-bit until later in the load/store emulation.
  76 */
  77#define IA64_OPCODE_MASK        0x1ef
  78#define IA64_OPCODE_SHIFT       32
  79
  80/*
  81 * Table C-28 Integer Load/Store
  82 *
  83 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  84 *
  85 * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
  86 * the address (bits [8:3]), so we must failed.
  87 */
  88#define LD_OP            0x080
  89#define LDS_OP           0x081
  90#define LDA_OP           0x082
  91#define LDSA_OP          0x083
  92#define LDBIAS_OP        0x084
  93#define LDACQ_OP         0x085
  94/* 0x086, 0x087 are not relevant */
  95#define LDCCLR_OP        0x088
  96#define LDCNC_OP         0x089
  97#define LDCCLRACQ_OP     0x08a
  98#define ST_OP            0x08c
  99#define STREL_OP         0x08d
 100/* 0x08e,0x8f are not relevant */
 101
 102/*
 103 * Table C-29 Integer Load +Reg
 104 *
 105 * we use the ld->m (bit [36:36]) field to determine whether or not we have
 106 * a load/store of this form.
 107 */
 108
 109/*
 110 * Table C-30 Integer Load/Store +Imm
 111 *
 112 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
 113 *
 114 * ld8.fill, st8.fill  must be aligned because the Nat register are based on
 115 * the address, so we must fail and the program must be fixed.
 116 */
 117#define LD_IMM_OP            0x0a0
 118#define LDS_IMM_OP           0x0a1
 119#define LDA_IMM_OP           0x0a2
 120#define LDSA_IMM_OP          0x0a3
 121#define LDBIAS_IMM_OP        0x0a4
 122#define LDACQ_IMM_OP         0x0a5
 123/* 0x0a6, 0xa7 are not relevant */
 124#define LDCCLR_IMM_OP        0x0a8
 125#define LDCNC_IMM_OP         0x0a9
 126#define LDCCLRACQ_IMM_OP     0x0aa
 127#define ST_IMM_OP            0x0ac
 128#define STREL_IMM_OP         0x0ad
 129/* 0x0ae,0xaf are not relevant */
 130
 131/*
 132 * Table C-32 Floating-point Load/Store
 133 */
 134#define LDF_OP           0x0c0
 135#define LDFS_OP          0x0c1
 136#define LDFA_OP          0x0c2
 137#define LDFSA_OP         0x0c3
 138/* 0x0c6 is irrelevant */
 139#define LDFCCLR_OP       0x0c8
 140#define LDFCNC_OP        0x0c9
 141/* 0x0cb is irrelevant  */
 142#define STF_OP           0x0cc
 143
 144/*
 145 * Table C-33 Floating-point Load +Reg
 146 *
 147 * we use the ld->m (bit [36:36]) field to determine whether or not we have
 148 * a load/store of this form.
 149 */
 150
 151/*
 152 * Table C-34 Floating-point Load/Store +Imm
 153 */
 154#define LDF_IMM_OP       0x0e0
 155#define LDFS_IMM_OP      0x0e1
 156#define LDFA_IMM_OP      0x0e2
 157#define LDFSA_IMM_OP     0x0e3
 158/* 0x0e6 is irrelevant */
 159#define LDFCCLR_IMM_OP   0x0e8
 160#define LDFCNC_IMM_OP    0x0e9
 161#define STF_IMM_OP       0x0ec
 162
 163typedef struct {
 164        unsigned long    qp:6;  /* [0:5]   */
 165        unsigned long    r1:7;  /* [6:12]  */
 166        unsigned long   imm:7;  /* [13:19] */
 167        unsigned long    r3:7;  /* [20:26] */
 168        unsigned long     x:1;  /* [27:27] */
 169        unsigned long  hint:2;  /* [28:29] */
 170        unsigned long x6_sz:2;  /* [30:31] */
 171        unsigned long x6_op:4;  /* [32:35], x6 = x6_sz|x6_op */
 172        unsigned long     m:1;  /* [36:36] */
 173        unsigned long    op:4;  /* [37:40] */
 174        unsigned long   pad:23; /* [41:63] */
 175} load_store_t;
 176
 177
 178typedef enum {
 179        UPD_IMMEDIATE,  /* ldXZ r1=[r3],imm(9) */
 180        UPD_REG         /* ldXZ r1=[r3],r2     */
 181} update_t;
 182
 183/*
 184 * We use tables to keep track of the offsets of registers in the saved state.
 185 * This way we save having big switch/case statements.
 186 *
 187 * We use bit 0 to indicate switch_stack or pt_regs.
 188 * The offset is simply shifted by 1 bit.
 189 * A 2-byte value should be enough to hold any kind of offset
 190 *
 191 * In case the calling convention changes (and thus pt_regs/switch_stack)
 192 * simply use RSW instead of RPT or vice-versa.
 193 */
 194
 195#define RPO(x)  ((size_t) &((struct pt_regs *)0)->x)
 196#define RSO(x)  ((size_t) &((struct switch_stack *)0)->x)
 197
 198#define RPT(x)          (RPO(x) << 1)
 199#define RSW(x)          (1| RSO(x)<<1)
 200
 201#define GR_OFFS(x)      (gr_info[x]>>1)
 202#define GR_IN_SW(x)     (gr_info[x] & 0x1)
 203
 204#define FR_OFFS(x)      (fr_info[x]>>1)
 205#define FR_IN_SW(x)     (fr_info[x] & 0x1)
 206
 207static u16 gr_info[32]={
 208        0,                      /* r0 is read-only : WE SHOULD NEVER GET THIS */
 209
 210        RPT(r1), RPT(r2), RPT(r3),
 211
 212        RSW(r4), RSW(r5), RSW(r6), RSW(r7),
 213
 214        RPT(r8), RPT(r9), RPT(r10), RPT(r11),
 215        RPT(r12), RPT(r13), RPT(r14), RPT(r15),
 216
 217        RPT(r16), RPT(r17), RPT(r18), RPT(r19),
 218        RPT(r20), RPT(r21), RPT(r22), RPT(r23),
 219        RPT(r24), RPT(r25), RPT(r26), RPT(r27),
 220        RPT(r28), RPT(r29), RPT(r30), RPT(r31)
 221};
 222
 223static u16 fr_info[32]={
 224        0,                      /* constant : WE SHOULD NEVER GET THIS */
 225        0,                      /* constant : WE SHOULD NEVER GET THIS */
 226
 227        RSW(f2), RSW(f3), RSW(f4), RSW(f5),
 228
 229        RPT(f6), RPT(f7), RPT(f8), RPT(f9),
 230        RPT(f10), RPT(f11),
 231
 232        RSW(f12), RSW(f13), RSW(f14),
 233        RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
 234        RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
 235        RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
 236        RSW(f30), RSW(f31)
 237};
 238
 239/* Invalidate ALAT entry for integer register REGNO.  */
 240static void
 241invala_gr (int regno)
 242{
 243#       define F(reg)   case reg: ia64_invala_gr(reg); break
 244
 245        switch (regno) {
 246                F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 247                F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 248                F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 249                F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 250                F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 251                F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 252                F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 253                F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 254                F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 255                F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 256                F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 257                F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 258                F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 259                F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 260                F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 261                F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 262        }
 263#       undef F
 264}
 265
 266/* Invalidate ALAT entry for floating-point register REGNO.  */
 267static void
 268invala_fr (int regno)
 269{
 270#       define F(reg)   case reg: ia64_invala_fr(reg); break
 271
 272        switch (regno) {
 273                F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 274                F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 275                F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 276                F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 277                F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 278                F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 279                F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 280                F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 281                F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 282                F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 283                F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 284                F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 285                F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 286                F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 287                F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 288                F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 289        }
 290#       undef F
 291}
 292
 293static inline unsigned long
 294rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
 295{
 296        reg += rrb;
 297        if (reg >= sor)
 298                reg -= sor;
 299        return reg;
 300}
 301
 302static void
 303set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 304{
 305        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 306        unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
 307        unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 308        unsigned long rnats, nat_mask;
 309        unsigned long on_kbs;
 310        long sof = (regs->cr_ifs) & 0x7f;
 311        long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 312        long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 313        long ridx = r1 - 32;
 314
 315        if (ridx >= sof) {
 316                /* this should never happen, as the "rsvd register fault" has higher priority */
 317                DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
 318                return;
 319        }
 320
 321        if (ridx < sor)
 322                ridx = rotate_reg(sor, rrb_gr, ridx);
 323
 324        DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 325               r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 326
 327        on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 328        addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 329        if (addr >= kbs) {
 330                /* the register is on the kernel backing store: easy... */
 331                rnat_addr = ia64_rse_rnat_addr(addr);
 332                if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 333                        rnat_addr = &sw->ar_rnat;
 334                nat_mask = 1UL << ia64_rse_slot_num(addr);
 335
 336                *addr = val;
 337                if (nat)
 338                        *rnat_addr |=  nat_mask;
 339                else
 340                        *rnat_addr &= ~nat_mask;
 341                return;
 342        }
 343
 344        if (!user_stack(current, regs)) {
 345                DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
 346                return;
 347        }
 348
 349        bspstore = (unsigned long *)regs->ar_bspstore;
 350        ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 351        bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 352        addr    = ia64_rse_skip_regs(bsp, ridx);
 353
 354        DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 355
 356        ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 357
 358        rnat_addr = ia64_rse_rnat_addr(addr);
 359
 360        ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 361        DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
 362               (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
 363
 364        nat_mask = 1UL << ia64_rse_slot_num(addr);
 365        if (nat)
 366                rnats |=  nat_mask;
 367        else
 368                rnats &= ~nat_mask;
 369        ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
 370
 371        DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 372}
 373
 374
 375static void
 376get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
 377{
 378        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 379        unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
 380        unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 381        unsigned long rnats, nat_mask;
 382        unsigned long on_kbs;
 383        long sof = (regs->cr_ifs) & 0x7f;
 384        long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 385        long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 386        long ridx = r1 - 32;
 387
 388        if (ridx >= sof) {
 389                /* read of out-of-frame register returns an undefined value; 0 in our case.  */
 390                DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
 391                goto fail;
 392        }
 393
 394        if (ridx < sor)
 395                ridx = rotate_reg(sor, rrb_gr, ridx);
 396
 397        DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 398               r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 399
 400        on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 401        addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 402        if (addr >= kbs) {
 403                /* the register is on the kernel backing store: easy... */
 404                *val = *addr;
 405                if (nat) {
 406                        rnat_addr = ia64_rse_rnat_addr(addr);
 407                        if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 408                                rnat_addr = &sw->ar_rnat;
 409                        nat_mask = 1UL << ia64_rse_slot_num(addr);
 410                        *nat = (*rnat_addr & nat_mask) != 0;
 411                }
 412                return;
 413        }
 414
 415        if (!user_stack(current, regs)) {
 416                DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 417                goto fail;
 418        }
 419
 420        bspstore = (unsigned long *)regs->ar_bspstore;
 421        ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 422        bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 423        addr    = ia64_rse_skip_regs(bsp, ridx);
 424
 425        DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 426
 427        ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 428
 429        if (nat) {
 430                rnat_addr = ia64_rse_rnat_addr(addr);
 431                nat_mask = 1UL << ia64_rse_slot_num(addr);
 432
 433                DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 434
 435                ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 436                *nat = (rnats & nat_mask) != 0;
 437        }
 438        return;
 439
 440  fail:
 441        *val = 0;
 442        if (nat)
 443                *nat = 0;
 444        return;
 445}
 446
 447
 448static void
 449setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
 450{
 451        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 452        unsigned long addr;
 453        unsigned long bitmask;
 454        unsigned long *unat;
 455
 456        /*
 457         * First takes care of stacked registers
 458         */
 459        if (regnum >= IA64_FIRST_STACKED_GR) {
 460                set_rse_reg(regs, regnum, val, nat);
 461                return;
 462        }
 463
 464        /*
 465         * Using r0 as a target raises a General Exception fault which has higher priority
 466         * than the Unaligned Reference fault.
 467         */
 468
 469        /*
 470         * Now look at registers in [0-31] range and init correct UNAT
 471         */
 472        if (GR_IN_SW(regnum)) {
 473                addr = (unsigned long)sw;
 474                unat = &sw->ar_unat;
 475        } else {
 476                addr = (unsigned long)regs;
 477                unat = &sw->caller_unat;
 478        }
 479        DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
 480               addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
 481        /*
 482         * add offset from base of struct
 483         * and do it !
 484         */
 485        addr += GR_OFFS(regnum);
 486
 487        *(unsigned long *)addr = val;
 488
 489        /*
 490         * We need to clear the corresponding UNAT bit to fully emulate the load
 491         * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
 492         */
 493        bitmask   = 1UL << (addr >> 3 & 0x3f);
 494        DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
 495        if (nat) {
 496                *unat |= bitmask;
 497        } else {
 498                *unat &= ~bitmask;
 499        }
 500        DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
 501}
 502
 503/*
 504 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
 505 * range from 32-127, result is in the range from 0-95.
 506 */
 507static inline unsigned long
 508fph_index (struct pt_regs *regs, long regnum)
 509{
 510        unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
 511        return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
 512}
 513
 514static void
 515setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 516{
 517        struct switch_stack *sw = (struct switch_stack *)regs - 1;
 518        unsigned long addr;
 519
 520        /*
 521         * From EAS-2.5: FPDisableFault has higher priority than Unaligned
 522         * Fault. Thus, when we get here, we know the partition is enabled.
 523         * To update f32-f127, there are three choices:
 524         *
 525         *      (1) save f32-f127 to thread.fph and update the values there
 526         *      (2) use a gigantic switch statement to directly access the registers
 527         *      (3) generate code on the fly to update the desired register
 528         *
 529         * For now, we are using approach (1).
 530         */
 531        if (regnum >= IA64_FIRST_ROTATING_FR) {
 532                ia64_sync_fph(current);
 533                current->thread.fph[fph_index(regs, regnum)] = *fpval;
 534        } else {
 535                /*
 536                 * pt_regs or switch_stack ?
 537                 */
 538                if (FR_IN_SW(regnum)) {
 539                        addr = (unsigned long)sw;
 540                } else {
 541                        addr = (unsigned long)regs;
 542                }
 543
 544                DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
 545
 546                addr += FR_OFFS(regnum);
 547                *(struct ia64_fpreg *)addr = *fpval;
 548
 549                /*
 550                 * mark the low partition as being used now
 551                 *
 552                 * It is highly unlikely that this bit is not already set, but
 553                 * let's do it for safety.
 554                 */
 555                regs->cr_ipsr |= IA64_PSR_MFL;
 556        }
 557}
 558
 559/*
 560 * Those 2 inline functions generate the spilled versions of the constant floating point
 561 * registers which can be used with stfX
 562 */
 563static inline void
 564float_spill_f0 (struct ia64_fpreg *final)
 565{
 566        ia64_stf_spill(final, 0);
 567}
 568
 569static inline void
 570float_spill_f1 (struct ia64_fpreg *final)
 571{
 572        ia64_stf_spill(final, 1);
 573}
 574
 575static void
 576getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 577{
 578        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 579        unsigned long addr;
 580
 581        /*
 582         * From EAS-2.5: FPDisableFault has higher priority than
 583         * Unaligned Fault. Thus, when we get here, we know the partition is
 584         * enabled.
 585         *
 586         * When regnum > 31, the register is still live and we need to force a save
 587         * to current->thread.fph to get access to it.  See discussion in setfpreg()
 588         * for reasons and other ways of doing this.
 589         */
 590        if (regnum >= IA64_FIRST_ROTATING_FR) {
 591                ia64_flush_fph(current);
 592                *fpval = current->thread.fph[fph_index(regs, regnum)];
 593        } else {
 594                /*
 595                 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
 596                 * not saved, we must generate their spilled form on the fly
 597                 */
 598                switch(regnum) {
 599                case 0:
 600                        float_spill_f0(fpval);
 601                        break;
 602                case 1:
 603                        float_spill_f1(fpval);
 604                        break;
 605                default:
 606                        /*
 607                         * pt_regs or switch_stack ?
 608                         */
 609                        addr =  FR_IN_SW(regnum) ? (unsigned long)sw
 610                                                 : (unsigned long)regs;
 611
 612                        DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
 613                               FR_IN_SW(regnum), addr, FR_OFFS(regnum));
 614
 615                        addr  += FR_OFFS(regnum);
 616                        *fpval = *(struct ia64_fpreg *)addr;
 617                }
 618        }
 619}
 620
 621
 622static void
 623getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
 624{
 625        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 626        unsigned long addr, *unat;
 627
 628        if (regnum >= IA64_FIRST_STACKED_GR) {
 629                get_rse_reg(regs, regnum, val, nat);
 630                return;
 631        }
 632
 633        /*
 634         * take care of r0 (read-only always evaluate to 0)
 635         */
 636        if (regnum == 0) {
 637                *val = 0;
 638                if (nat)
 639                        *nat = 0;
 640                return;
 641        }
 642
 643        /*
 644         * Now look at registers in [0-31] range and init correct UNAT
 645         */
 646        if (GR_IN_SW(regnum)) {
 647                addr = (unsigned long)sw;
 648                unat = &sw->ar_unat;
 649        } else {
 650                addr = (unsigned long)regs;
 651                unat = &sw->caller_unat;
 652        }
 653
 654        DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
 655
 656        addr += GR_OFFS(regnum);
 657
 658        *val  = *(unsigned long *)addr;
 659
 660        /*
 661         * do it only when requested
 662         */
 663        if (nat)
 664                *nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
 665}
 666
 667static void
 668emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
 669{
 670        /*
 671         * IMPORTANT:
 672         * Given the way we handle unaligned speculative loads, we should
 673         * not get to this point in the code but we keep this sanity check,
 674         * just in case.
 675         */
 676        if (ld.x6_op == 1 || ld.x6_op == 3) {
 677                printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
 678                die_if_kernel("unaligned reference on speculative load with register update\n",
 679                              regs, 30);
 680        }
 681
 682
 683        /*
 684         * at this point, we know that the base register to update is valid i.e.,
 685         * it's not r0
 686         */
 687        if (type == UPD_IMMEDIATE) {
 688                unsigned long imm;
 689
 690                /*
 691                 * Load +Imm: ldXZ r1=[r3],imm(9)
 692                 *
 693                 *
 694                 * form imm9: [13:19] contain the first 7 bits
 695                 */
 696                imm = ld.x << 7 | ld.imm;
 697
 698                /*
 699                 * sign extend (1+8bits) if m set
 700                 */
 701                if (ld.m) imm |= SIGN_EXT9;
 702
 703                /*
 704                 * ifa == r3 and we know that the NaT bit on r3 was clear so
 705                 * we can directly use ifa.
 706                 */
 707                ifa += imm;
 708
 709                setreg(ld.r3, ifa, 0, regs);
 710
 711                DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
 712
 713        } else if (ld.m) {
 714                unsigned long r2;
 715                int nat_r2;
 716
 717                /*
 718                 * Load +Reg Opcode: ldXZ r1=[r3],r2
 719                 *
 720                 * Note: that we update r3 even in the case of ldfX.a
 721                 * (where the load does not happen)
 722                 *
 723                 * The way the load algorithm works, we know that r3 does not
 724                 * have its NaT bit set (would have gotten NaT consumption
 725                 * before getting the unaligned fault). So we can use ifa
 726                 * which equals r3 at this point.
 727                 *
 728                 * IMPORTANT:
 729                 * The above statement holds ONLY because we know that we
 730                 * never reach this code when trying to do a ldX.s.
 731                 * If we ever make it to here on an ldfX.s then
 732                 */
 733                getreg(ld.imm, &r2, &nat_r2, regs);
 734
 735                ifa += r2;
 736
 737                /*
 738                 * propagate Nat r2 -> r3
 739                 */
 740                setreg(ld.r3, ifa, nat_r2, regs);
 741
 742                DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
 743        }
 744}
 745
 746
 747static int
 748emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 749{
 750        unsigned int len = 1 << ld.x6_sz;
 751        unsigned long val = 0;
 752
 753        /*
 754         * r0, as target, doesn't need to be checked because Illegal Instruction
 755         * faults have higher priority than unaligned faults.
 756         *
 757         * r0 cannot be found as the base as it would never generate an
 758         * unaligned reference.
 759         */
 760
 761        /*
 762         * ldX.a we will emulate load and also invalidate the ALAT entry.
 763         * See comment below for explanation on how we handle ldX.a
 764         */
 765
 766        if (len != 2 && len != 4 && len != 8) {
 767                DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 768                return -1;
 769        }
 770        /* this assumes little-endian byte-order: */
 771        if (copy_from_user(&val, (void __user *) ifa, len))
 772                return -1;
 773        setreg(ld.r1, val, 0, regs);
 774
 775        /*
 776         * check for updates on any kind of loads
 777         */
 778        if (ld.op == 0x5 || ld.m)
 779                emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
 780
 781        /*
 782         * handling of various loads (based on EAS2.4):
 783         *
 784         * ldX.acq (ordered load):
 785         *      - acquire semantics would have been used, so force fence instead.
 786         *
 787         * ldX.c.clr (check load and clear):
 788         *      - if we get to this handler, it's because the entry was not in the ALAT.
 789         *        Therefore the operation reverts to a normal load
 790         *
 791         * ldX.c.nc (check load no clear):
 792         *      - same as previous one
 793         *
 794         * ldX.c.clr.acq (ordered check load and clear):
 795         *      - same as above for c.clr part. The load needs to have acquire semantics. So
 796         *        we use the fence semantics which is stronger and thus ensures correctness.
 797         *
 798         * ldX.a (advanced load):
 799         *      - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
 800         *        address doesn't match requested size alignment. This means that we would
 801         *        possibly need more than one load to get the result.
 802         *
 803         *        The load part can be handled just like a normal load, however the difficult
 804         *        part is to get the right thing into the ALAT. The critical piece of information
 805         *        in the base address of the load & size. To do that, a ld.a must be executed,
 806         *        clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
 807         *        if we use the same target register, we will be okay for the check.a instruction.
 808         *        If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
 809         *        which would overlap within [r3,r3+X] (the size of the load was store in the
 810         *        ALAT). If such an entry is found the entry is invalidated. But this is not good
 811         *        enough, take the following example:
 812         *              r3=3
 813         *              ld4.a r1=[r3]
 814         *
 815         *        Could be emulated by doing:
 816         *              ld1.a r1=[r3],1
 817         *              store to temporary;
 818         *              ld1.a r1=[r3],1
 819         *              store & shift to temporary;
 820         *              ld1.a r1=[r3],1
 821         *              store & shift to temporary;
 822         *              ld1.a r1=[r3]
 823         *              store & shift to temporary;
 824         *              r1=temporary
 825         *
 826         *        So in this case, you would get the right value is r1 but the wrong info in
 827         *        the ALAT.  Notice that you could do it in reverse to finish with address 3
 828         *        but you would still get the size wrong.  To get the size right, one needs to
 829         *        execute exactly the same kind of load. You could do it from a aligned
 830         *        temporary location, but you would get the address wrong.
 831         *
 832         *        So no matter what, it is not possible to emulate an advanced load
 833         *        correctly. But is that really critical ?
 834         *
 835         *        We will always convert ld.a into a normal load with ALAT invalidated.  This
 836         *        will enable compiler to do optimization where certain code path after ld.a
 837         *        is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
 838         *
 839         *        If there is a store after the advanced load, one must either do a ld.c.* or
 840         *        chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
 841         *        entry found in ALAT), and that's perfectly ok because:
 842         *
 843         *              - ld.c.*, if the entry is not present a  normal load is executed
 844         *              - chk.a.*, if the entry is not present, execution jumps to recovery code
 845         *
 846         *        In either case, the load can be potentially retried in another form.
 847         *
 848         *        ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
 849         *        up a stale entry later). The register base update MUST also be performed.
 850         */
 851
 852        /*
 853         * when the load has the .acq completer then
 854         * use ordering fence.
 855         */
 856        if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
 857                mb();
 858
 859        /*
 860         * invalidate ALAT entry in case of advanced load
 861         */
 862        if (ld.x6_op == 0x2)
 863                invala_gr(ld.r1);
 864
 865        return 0;
 866}
 867
 868static int
 869emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 870{
 871        unsigned long r2;
 872        unsigned int len = 1 << ld.x6_sz;
 873
 874        /*
 875         * if we get to this handler, Nat bits on both r3 and r2 have already
 876         * been checked. so we don't need to do it
 877         *
 878         * extract the value to be stored
 879         */
 880        getreg(ld.imm, &r2, NULL, regs);
 881
 882        /*
 883         * we rely on the macros in unaligned.h for now i.e.,
 884         * we let the compiler figure out how to read memory gracefully.
 885         *
 886         * We need this switch/case because the way the inline function
 887         * works. The code is optimized by the compiler and looks like
 888         * a single switch/case.
 889         */
 890        DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
 891
 892        if (len != 2 && len != 4 && len != 8) {
 893                DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 894                return -1;
 895        }
 896
 897        /* this assumes little-endian byte-order: */
 898        if (copy_to_user((void __user *) ifa, &r2, len))
 899                return -1;
 900
 901        /*
 902         * stX [r3]=r2,imm(9)
 903         *
 904         * NOTE:
 905         * ld.r3 can never be r0, because r0 would not generate an
 906         * unaligned access.
 907         */
 908        if (ld.op == 0x5) {
 909                unsigned long imm;
 910
 911                /*
 912                 * form imm9: [12:6] contain first 7bits
 913                 */
 914                imm = ld.x << 7 | ld.r1;
 915                /*
 916                 * sign extend (8bits) if m set
 917                 */
 918                if (ld.m) imm |= SIGN_EXT9;
 919                /*
 920                 * ifa == r3 (NaT is necessarily cleared)
 921                 */
 922                ifa += imm;
 923
 924                DPRINT("imm=%lx r3=%lx\n", imm, ifa);
 925
 926                setreg(ld.r3, ifa, 0, regs);
 927        }
 928        /*
 929         * we don't have alat_invalidate_multiple() so we need
 930         * to do the complete flush :-<<
 931         */
 932        ia64_invala();
 933
 934        /*
 935         * stX.rel: use fence instead of release
 936         */
 937        if (ld.x6_op == 0xd)
 938                mb();
 939
 940        return 0;
 941}
 942
 943/*
 944 * floating point operations sizes in bytes
 945 */
 946static const unsigned char float_fsz[4]={
 947        10, /* extended precision (e) */
 948        8,  /* integer (8)            */
 949        4,  /* single precision (s)   */
 950        8   /* double precision (d)   */
 951};
 952
 953static inline void
 954mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 955{
 956        ia64_ldfe(6, init);
 957        ia64_stop();
 958        ia64_stf_spill(final, 6);
 959}
 960
 961static inline void
 962mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 963{
 964        ia64_ldf8(6, init);
 965        ia64_stop();
 966        ia64_stf_spill(final, 6);
 967}
 968
 969static inline void
 970mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 971{
 972        ia64_ldfs(6, init);
 973        ia64_stop();
 974        ia64_stf_spill(final, 6);
 975}
 976
 977static inline void
 978mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
 979{
 980        ia64_ldfd(6, init);
 981        ia64_stop();
 982        ia64_stf_spill(final, 6);
 983}
 984
 985static inline void
 986float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 987{
 988        ia64_ldf_fill(6, init);
 989        ia64_stop();
 990        ia64_stfe(final, 6);
 991}
 992
 993static inline void
 994float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 995{
 996        ia64_ldf_fill(6, init);
 997        ia64_stop();
 998        ia64_stf8(final, 6);
 999}
1000
1001static inline void
1002float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1003{
1004        ia64_ldf_fill(6, init);
1005        ia64_stop();
1006        ia64_stfs(final, 6);
1007}
1008
1009static inline void
1010float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1011{
1012        ia64_ldf_fill(6, init);
1013        ia64_stop();
1014        ia64_stfd(final, 6);
1015}
1016
1017static int
1018emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1019{
1020        struct ia64_fpreg fpr_init[2];
1021        struct ia64_fpreg fpr_final[2];
1022        unsigned long len = float_fsz[ld.x6_sz];
1023
1024        /*
1025         * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1026         * higher priority than unaligned faults.
1027         *
1028         * r0 cannot be found as the base as it would never generate an unaligned
1029         * reference.
1030         */
1031
1032        /*
1033         * make sure we get clean buffers
1034         */
1035        memset(&fpr_init, 0, sizeof(fpr_init));
1036        memset(&fpr_final, 0, sizeof(fpr_final));
1037
1038        /*
1039         * ldfpX.a: we don't try to emulate anything but we must
1040         * invalidate the ALAT entry and execute updates, if any.
1041         */
1042        if (ld.x6_op != 0x2) {
1043                /*
1044                 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
1045                 * instruction:
1046                 */
1047                if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1048                    || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1049                        return -1;
1050
1051                DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1052                DDUMP("frp_init =", &fpr_init, 2*len);
1053                /*
1054                 * XXX fixme
1055                 * Could optimize inlines by using ldfpX & 2 spills
1056                 */
1057                switch( ld.x6_sz ) {
1058                        case 0:
1059                                mem2float_extended(&fpr_init[0], &fpr_final[0]);
1060                                mem2float_extended(&fpr_init[1], &fpr_final[1]);
1061                                break;
1062                        case 1:
1063                                mem2float_integer(&fpr_init[0], &fpr_final[0]);
1064                                mem2float_integer(&fpr_init[1], &fpr_final[1]);
1065                                break;
1066                        case 2:
1067                                mem2float_single(&fpr_init[0], &fpr_final[0]);
1068                                mem2float_single(&fpr_init[1], &fpr_final[1]);
1069                                break;
1070                        case 3:
1071                                mem2float_double(&fpr_init[0], &fpr_final[0]);
1072                                mem2float_double(&fpr_init[1], &fpr_final[1]);
1073                                break;
1074                }
1075                DDUMP("fpr_final =", &fpr_final, 2*len);
1076                /*
1077                 * XXX fixme
1078                 *
1079                 * A possible optimization would be to drop fpr_final and directly
1080                 * use the storage from the saved context i.e., the actual final
1081                 * destination (pt_regs, switch_stack or thread structure).
1082                 */
1083                setfpreg(ld.r1, &fpr_final[0], regs);
1084                setfpreg(ld.imm, &fpr_final[1], regs);
1085        }
1086
1087        /*
1088         * Check for updates: only immediate updates are available for this
1089         * instruction.
1090         */
1091        if (ld.m) {
1092                /*
1093                 * the immediate is implicit given the ldsz of the operation:
1094                 * single: 8 (2x4) and for  all others it's 16 (2x8)
1095                 */
1096                ifa += len<<1;
1097
1098                /*
1099                 * IMPORTANT:
1100                 * the fact that we force the NaT of r3 to zero is ONLY valid
1101                 * as long as we don't come here with a ldfpX.s.
1102                 * For this reason we keep this sanity check
1103                 */
1104                if (ld.x6_op == 1 || ld.x6_op == 3)
1105                        printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1106                               __FUNCTION__);
1107
1108                setreg(ld.r3, ifa, 0, regs);
1109        }
1110
1111        /*
1112         * Invalidate ALAT entries, if any, for both registers.
1113         */
1114        if (ld.x6_op == 0x2) {
1115                invala_fr(ld.r1);
1116                invala_fr(ld.imm);
1117        }
1118        return 0;
1119}
1120
1121
1122static int
1123emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1124{
1125        struct ia64_fpreg fpr_init;
1126        struct ia64_fpreg fpr_final;
1127        unsigned long len = float_fsz[ld.x6_sz];
1128
1129        /*
1130         * fr0 & fr1 don't need to be checked because Illegal Instruction
1131         * faults have higher priority than unaligned faults.
1132         *
1133         * r0 cannot be found as the base as it would never generate an
1134         * unaligned reference.
1135         */
1136
1137        /*
1138         * make sure we get clean buffers
1139         */
1140        memset(&fpr_init,0, sizeof(fpr_init));
1141        memset(&fpr_final,0, sizeof(fpr_final));
1142
1143        /*
1144         * ldfX.a we don't try to emulate anything but we must
1145         * invalidate the ALAT entry.
1146         * See comments in ldX for descriptions on how the various loads are handled.
1147         */
1148        if (ld.x6_op != 0x2) {
1149                if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1150                        return -1;
1151
1152                DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1153                DDUMP("fpr_init =", &fpr_init, len);
1154                /*
1155                 * we only do something for x6_op={0,8,9}
1156                 */
1157                switch( ld.x6_sz ) {
1158                        case 0:
1159                                mem2float_extended(&fpr_init, &fpr_final);
1160                                break;
1161                        case 1:
1162                                mem2float_integer(&fpr_init, &fpr_final);
1163                                break;
1164                        case 2:
1165                                mem2float_single(&fpr_init, &fpr_final);
1166                                break;
1167                        case 3:
1168                                mem2float_double(&fpr_init, &fpr_final);
1169                                break;
1170                }
1171                DDUMP("fpr_final =", &fpr_final, len);
1172                /*
1173                 * XXX fixme
1174                 *
1175                 * A possible optimization would be to drop fpr_final and directly
1176                 * use the storage from the saved context i.e., the actual final
1177                 * destination (pt_regs, switch_stack or thread structure).
1178                 */
1179                setfpreg(ld.r1, &fpr_final, regs);
1180        }
1181
1182        /*
1183         * check for updates on any loads
1184         */
1185        if (ld.op == 0x7 || ld.m)
1186                emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1187
1188        /*
1189         * invalidate ALAT entry in case of advanced floating point loads
1190         */
1191        if (ld.x6_op == 0x2)
1192                invala_fr(ld.r1);
1193
1194        return 0;
1195}
1196
1197
1198static int
1199emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1200{
1201        struct ia64_fpreg fpr_init;
1202        struct ia64_fpreg fpr_final;
1203        unsigned long len = float_fsz[ld.x6_sz];
1204
1205        /*
1206         * make sure we get clean buffers
1207         */
1208        memset(&fpr_init,0, sizeof(fpr_init));
1209        memset(&fpr_final,0, sizeof(fpr_final));
1210
1211        /*
1212         * if we get to this handler, Nat bits on both r3 and r2 have already
1213         * been checked. so we don't need to do it
1214         *
1215         * extract the value to be stored
1216         */
1217        getfpreg(ld.imm, &fpr_init, regs);
1218        /*
1219         * during this step, we extract the spilled registers from the saved
1220         * context i.e., we refill. Then we store (no spill) to temporary
1221         * aligned location
1222         */
1223        switch( ld.x6_sz ) {
1224                case 0:
1225                        float2mem_extended(&fpr_init, &fpr_final);
1226                        break;
1227                case 1:
1228                        float2mem_integer(&fpr_init, &fpr_final);
1229                        break;
1230                case 2:
1231                        float2mem_single(&fpr_init, &fpr_final);
1232                        break;
1233                case 3:
1234                        float2mem_double(&fpr_init, &fpr_final);
1235                        break;
1236        }
1237        DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1238        DDUMP("fpr_init =", &fpr_init, len);
1239        DDUMP("fpr_final =", &fpr_final, len);
1240
1241        if (copy_to_user((void __user *) ifa, &fpr_final, len))
1242                return -1;
1243
1244        /*
1245         * stfX [r3]=r2,imm(9)
1246         *
1247         * NOTE:
1248         * ld.r3 can never be r0, because r0 would not generate an
1249         * unaligned access.
1250         */
1251        if (ld.op == 0x7) {
1252                unsigned long imm;
1253
1254                /*
1255                 * form imm9: [12:6] contain first 7bits
1256                 */
1257                imm = ld.x << 7 | ld.r1;
1258                /*
1259                 * sign extend (8bits) if m set
1260                 */
1261                if (ld.m)
1262                        imm |= SIGN_EXT9;
1263                /*
1264                 * ifa == r3 (NaT is necessarily cleared)
1265                 */
1266                ifa += imm;
1267
1268                DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1269
1270                setreg(ld.r3, ifa, 0, regs);
1271        }
1272        /*
1273         * we don't have alat_invalidate_multiple() so we need
1274         * to do the complete flush :-<<
1275         */
1276        ia64_invala();
1277
1278        return 0;
1279}
1280
1281/*
1282 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1283 * eventually fix the program.  However, we don't want to do that for every access so we
1284 * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
1285 * either...
1286 */
1287static int
1288within_logging_rate_limit (void)
1289{
1290        static unsigned long count, last_time;
1291
1292        if (jiffies - last_time > 5*HZ)
1293                count = 0;
1294        if (count < 5) {
1295                last_time = jiffies;
1296                count++;
1297                return 1;
1298        }
1299        return 0;
1300
1301}
1302
1303void
1304ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1305{
1306        struct ia64_psr *ipsr = ia64_psr(regs);
1307        mm_segment_t old_fs = get_fs();
1308        unsigned long bundle[2];
1309        unsigned long opcode;
1310        struct siginfo si;
1311        const struct exception_table_entry *eh = NULL;
1312        union {
1313                unsigned long l;
1314                load_store_t insn;
1315        } u;
1316        int ret = -1;
1317
1318        if (ia64_psr(regs)->be) {
1319                /* we don't support big-endian accesses */
1320                die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1321                goto force_sigbus;
1322        }
1323
1324        /*
1325         * Treat kernel accesses for which there is an exception handler entry the same as
1326         * user-level unaligned accesses.  Otherwise, a clever program could trick this
1327         * handler into reading an arbitrary kernel addresses...
1328         */
1329        if (!user_mode(regs))
1330                eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1331        if (user_mode(regs) || eh) {
1332                if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1333                        goto force_sigbus;
1334
1335                if (!no_unaligned_warning &&
1336                    !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
1337                    within_logging_rate_limit())
1338                {
1339                        char buf[200];  /* comm[] is at most 16 bytes... */
1340                        size_t len;
1341
1342                        len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1343                                      "ip=0x%016lx\n\r", current->comm, current->pid,
1344                                      ifa, regs->cr_iip + ipsr->ri);
1345                        /*
1346                         * Don't call tty_write_message() if we're in the kernel; we might
1347                         * be holding locks...
1348                         */
1349                        if (user_mode(regs))
1350                                tty_write_message(current->signal->tty, buf);
1351                        buf[len-1] = '\0';      /* drop '\r' */
1352                        /* watch for command names containing %s */
1353                        printk(KERN_WARNING "%s", buf);
1354                } else {
1355                        if (no_unaligned_warning && !noprint_warning) {
1356                                noprint_warning = 1;
1357                                printk(KERN_WARNING "%s(%d) encountered an "
1358                                       "unaligned exception which required\n"
1359                                       "kernel assistance, which degrades "
1360                                       "the performance of the application.\n"
1361                                       "Unaligned exception warnings have "
1362                                       "been disabled by the system "
1363                                       "administrator\n"
1364                                       "echo 0 > /proc/sys/kernel/ignore-"
1365                                       "unaligned-usertrap to re-enable\n",
1366                                       current->comm, current->pid);
1367                        }
1368                }
1369        } else {
1370                if (within_logging_rate_limit())
1371                        printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1372                               ifa, regs->cr_iip + ipsr->ri);
1373                set_fs(KERNEL_DS);
1374        }
1375
1376        DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1377               regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1378
1379        if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1380                goto failure;
1381
1382        /*
1383         * extract the instruction from the bundle given the slot number
1384         */
1385        switch (ipsr->ri) {
1386              case 0: u.l = (bundle[0] >>  5); break;
1387              case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1388              case 2: u.l = (bundle[1] >> 23); break;
1389        }
1390        opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1391
1392        DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1393               "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1394               u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1395
1396        /*
1397         * IMPORTANT:
1398         * Notice that the switch statement DOES not cover all possible instructions
1399         * that DO generate unaligned references. This is made on purpose because for some
1400         * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1401         * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1402         * the program will get a signal and die:
1403         *
1404         *      load/store:
1405         *              - ldX.spill
1406         *              - stX.spill
1407         *      Reason: RNATs are based on addresses
1408         *              - ld16
1409         *              - st16
1410         *      Reason: ld16 and st16 are supposed to occur in a single
1411         *              memory op
1412         *
1413         *      synchronization:
1414         *              - cmpxchg
1415         *              - fetchadd
1416         *              - xchg
1417         *      Reason: ATOMIC operations cannot be emulated properly using multiple
1418         *              instructions.
1419         *
1420         *      speculative loads:
1421         *              - ldX.sZ
1422         *      Reason: side effects, code must be ready to deal with failure so simpler
1423         *              to let the load fail.
1424         * ---------------------------------------------------------------------------------
1425         * XXX fixme
1426         *
1427         * I would like to get rid of this switch case and do something
1428         * more elegant.
1429         */
1430        switch (opcode) {
1431              case LDS_OP:
1432              case LDSA_OP:
1433                if (u.insn.x)
1434                        /* oops, really a semaphore op (cmpxchg, etc) */
1435                        goto failure;
1436                /* no break */
1437              case LDS_IMM_OP:
1438              case LDSA_IMM_OP:
1439              case LDFS_OP:
1440              case LDFSA_OP:
1441              case LDFS_IMM_OP:
1442                /*
1443                 * The instruction will be retried with deferred exceptions turned on, and
1444                 * we should get Nat bit installed
1445                 *
1446                 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1447                 * are actually executed even though the operation failed. So we don't
1448                 * need to take care of this.
1449                 */
1450                DPRINT("forcing PSR_ED\n");
1451                regs->cr_ipsr |= IA64_PSR_ED;
1452                goto done;
1453
1454              case LD_OP:
1455              case LDA_OP:
1456              case LDBIAS_OP:
1457              case LDACQ_OP:
1458              case LDCCLR_OP:
1459              case LDCNC_OP:
1460              case LDCCLRACQ_OP:
1461                if (u.insn.x)
1462                        /* oops, really a semaphore op (cmpxchg, etc) */
1463                        goto failure;
1464                /* no break */
1465              case LD_IMM_OP:
1466              case LDA_IMM_OP:
1467              case LDBIAS_IMM_OP:
1468              case LDACQ_IMM_OP:
1469              case LDCCLR_IMM_OP:
1470              case LDCNC_IMM_OP:
1471              case LDCCLRACQ_IMM_OP:
1472                ret = emulate_load_int(ifa, u.insn, regs);
1473                break;
1474
1475              case ST_OP:
1476              case STREL_OP:
1477                if (u.insn.x)
1478                        /* oops, really a semaphore op (cmpxchg, etc) */
1479                        goto failure;
1480                /* no break */
1481              case ST_IMM_OP:
1482              case STREL_IMM_OP:
1483                ret = emulate_store_int(ifa, u.insn, regs);
1484                break;
1485
1486              case LDF_OP:
1487              case LDFA_OP:
1488              case LDFCCLR_OP:
1489              case LDFCNC_OP:
1490              case LDF_IMM_OP:
1491              case LDFA_IMM_OP:
1492              case LDFCCLR_IMM_OP:
1493              case LDFCNC_IMM_OP:
1494                if (u.insn.x)
1495                        ret = emulate_load_floatpair(ifa, u.insn, regs);
1496                else
1497                        ret = emulate_load_float(ifa, u.insn, regs);
1498                break;
1499
1500              case STF_OP:
1501              case STF_IMM_OP:
1502                ret = emulate_store_float(ifa, u.insn, regs);
1503                break;
1504
1505              default:
1506                goto failure;
1507        }
1508        DPRINT("ret=%d\n", ret);
1509        if (ret)
1510                goto failure;
1511
1512        if (ipsr->ri == 2)
1513                /*
1514                 * given today's architecture this case is not likely to happen because a
1515                 * memory access instruction (M) can never be in the last slot of a
1516                 * bundle. But let's keep it for now.
1517                 */
1518                regs->cr_iip += 16;
1519        ipsr->ri = (ipsr->ri + 1) & 0x3;
1520
1521        DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1522  done:
1523        set_fs(old_fs);         /* restore original address limit */
1524        return;
1525
1526  failure:
1527        /* something went wrong... */
1528        if (!user_mode(regs)) {
1529                if (eh) {
1530                        ia64_handle_exception(regs, eh);
1531                        goto done;
1532                }
1533                die_if_kernel("error during unaligned kernel access\n", regs, ret);
1534                /* NOT_REACHED */
1535        }
1536  force_sigbus:
1537        si.si_signo = SIGBUS;
1538        si.si_errno = 0;
1539        si.si_code = BUS_ADRALN;
1540        si.si_addr = (void __user *) ifa;
1541        si.si_flags = 0;
1542        si.si_isr = 0;
1543        si.si_imm = 0;
1544        force_sig_info(SIGBUS, &si, current);
1545        goto done;
1546}
1547
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.