linux-old/arch/ia64/kernel/unaligned.c
<<
>>
Prefs
   1/*
   2 * Architecture-specific unaligned trap handling.
   3 *
   4 * Copyright (C) 1999-2002 Hewlett-Packard Co
   5 *      Stephane Eranian <eranian@hpl.hp.com>
   6 *      David Mosberger-Tang <davidm@hpl.hp.com>
   7 *
   8 * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
   9 *              get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
  10 *              stacked register returns an undefined value; it does NOT trigger a
  11 *              "rsvd register fault").
  12 * 2001/10/11   Fix unaligned access to rotating registers in s/w pipelined loops.
  13 * 2001/08/13   Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  14 * 2001/01/17   Add support emulation of unaligned kernel accesses.
  15 */
  16#include <linux/kernel.h>
  17#include <linux/sched.h>
  18#include <linux/smp_lock.h>
  19
  20#include <asm/uaccess.h>
  21#include <asm/rse.h>
  22#include <asm/processor.h>
  23#include <asm/unaligned.h>
  24
  25extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
  26
  27#undef DEBUG_UNALIGNED_TRAP
  28
  29#ifdef DEBUG_UNALIGNED_TRAP
  30# define DPRINT(a...)   do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
  31# define DDUMP(str,vp,len)      dump(str, vp, len)
  32
  33static void
  34dump (const char *str, void *vp, size_t len)
  35{
  36        unsigned char *cp = vp;
  37        int i;
  38
  39        printk("%s", str);
  40        for (i = 0; i < len; ++i)
  41                printk (" %02x", *cp++);
  42        printk("\n");
  43}
  44#else
  45# define DPRINT(a...)
  46# define DDUMP(str,vp,len)
  47#endif
  48
  49#define IA64_FIRST_STACKED_GR   32
  50#define IA64_FIRST_ROTATING_FR  32
  51#define SIGN_EXT9               0xffffffffffffff00ul
  52
  53/*
  54 * For M-unit:
  55 *
  56 *  opcode |   m  |   x6    |
  57 * --------|------|---------|
  58 * [40-37] | [36] | [35:30] |
  59 * --------|------|---------|
  60 *     4   |   1  |    6    | = 11 bits
  61 * --------------------------
  62 * However bits [31:30] are not directly useful to distinguish between
  63 * load/store so we can use [35:32] instead, which gives the following
  64 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
  65 * checking the m-bit until later in the load/store emulation.
  66 */
  67#define IA64_OPCODE_MASK        0x1ef
  68#define IA64_OPCODE_SHIFT       32
  69
  70/*
  71 * Table C-28 Integer Load/Store
  72 *
  73 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  74 *
  75 * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
  76 * the address (bits [8:3]), so we must failed.
  77 */
  78#define LD_OP            0x080
  79#define LDS_OP           0x081
  80#define LDA_OP           0x082
  81#define LDSA_OP          0x083
  82#define LDBIAS_OP        0x084
  83#define LDACQ_OP         0x085
  84/* 0x086, 0x087 are not relevant */
  85#define LDCCLR_OP        0x088
  86#define LDCNC_OP         0x089
  87#define LDCCLRACQ_OP     0x08a
  88#define ST_OP            0x08c
  89#define STREL_OP         0x08d
  90/* 0x08e,0x8f are not relevant */
  91
  92/*
  93 * Table C-29 Integer Load +Reg
  94 *
  95 * we use the ld->m (bit [36:36]) field to determine whether or not we have
  96 * a load/store of this form.
  97 */
  98
  99/*
 100 * Table C-30 Integer Load/Store +Imm
 101 *
 102 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
 103 *
 104 * ld8.fill, st8.fill  must be aligned because the Nat register are based on
 105 * the address, so we must fail and the program must be fixed.
 106 */
 107#define LD_IMM_OP            0x0a0
 108#define LDS_IMM_OP           0x0a1
 109#define LDA_IMM_OP           0x0a2
 110#define LDSA_IMM_OP          0x0a3
 111#define LDBIAS_IMM_OP        0x0a4
 112#define LDACQ_IMM_OP         0x0a5
 113/* 0x0a6, 0xa7 are not relevant */
 114#define LDCCLR_IMM_OP        0x0a8
 115#define LDCNC_IMM_OP         0x0a9
 116#define LDCCLRACQ_IMM_OP     0x0aa
 117#define ST_IMM_OP            0x0ac
 118#define STREL_IMM_OP         0x0ad
 119/* 0x0ae,0xaf are not relevant */
 120
 121/*
 122 * Table C-32 Floating-point Load/Store
 123 */
 124#define LDF_OP           0x0c0
 125#define LDFS_OP          0x0c1
 126#define LDFA_OP          0x0c2
 127#define LDFSA_OP         0x0c3
 128/* 0x0c6 is irrelevant */
 129#define LDFCCLR_OP       0x0c8
 130#define LDFCNC_OP        0x0c9
 131/* 0x0cb is irrelevant  */
 132#define STF_OP           0x0cc
 133
 134/*
 135 * Table C-33 Floating-point Load +Reg
 136 *
 137 * we use the ld->m (bit [36:36]) field to determine whether or not we have
 138 * a load/store of this form.
 139 */
 140
 141/*
 142 * Table C-34 Floating-point Load/Store +Imm
 143 */
 144#define LDF_IMM_OP       0x0e0
 145#define LDFS_IMM_OP      0x0e1
 146#define LDFA_IMM_OP      0x0e2
 147#define LDFSA_IMM_OP     0x0e3
 148/* 0x0e6 is irrelevant */
 149#define LDFCCLR_IMM_OP   0x0e8
 150#define LDFCNC_IMM_OP    0x0e9
 151#define STF_IMM_OP       0x0ec
 152
 153typedef struct {
 154        unsigned long    qp:6;  /* [0:5]   */
 155        unsigned long    r1:7;  /* [6:12]  */
 156        unsigned long   imm:7;  /* [13:19] */
 157        unsigned long    r3:7;  /* [20:26] */
 158        unsigned long     x:1;  /* [27:27] */
 159        unsigned long  hint:2;  /* [28:29] */
 160        unsigned long x6_sz:2;  /* [30:31] */
 161        unsigned long x6_op:4;  /* [32:35], x6 = x6_sz|x6_op */
 162        unsigned long     m:1;  /* [36:36] */
 163        unsigned long    op:4;  /* [37:40] */
 164        unsigned long   pad:23; /* [41:63] */
 165} load_store_t;
 166
 167
 168typedef enum {
 169        UPD_IMMEDIATE,  /* ldXZ r1=[r3],imm(9) */
 170        UPD_REG         /* ldXZ r1=[r3],r2     */
 171} update_t;
 172
 173/*
 174 * We use tables to keep track of the offsets of registers in the saved state.
 175 * This way we save having big switch/case statements.
 176 *
 177 * We use bit 0 to indicate switch_stack or pt_regs.
 178 * The offset is simply shifted by 1 bit.
 179 * A 2-byte value should be enough to hold any kind of offset
 180 *
 181 * In case the calling convention changes (and thus pt_regs/switch_stack)
 182 * simply use RSW instead of RPT or vice-versa.
 183 */
 184
 185#define RPO(x)  ((size_t) &((struct pt_regs *)0)->x)
 186#define RSO(x)  ((size_t) &((struct switch_stack *)0)->x)
 187
 188#define RPT(x)          (RPO(x) << 1)
 189#define RSW(x)          (1| RSO(x)<<1)
 190
 191#define GR_OFFS(x)      (gr_info[x]>>1)
 192#define GR_IN_SW(x)     (gr_info[x] & 0x1)
 193
 194#define FR_OFFS(x)      (fr_info[x]>>1)
 195#define FR_IN_SW(x)     (fr_info[x] & 0x1)
 196
 197static u16 gr_info[32]={
 198        0,                      /* r0 is read-only : WE SHOULD NEVER GET THIS */
 199
 200        RPT(r1), RPT(r2), RPT(r3),
 201
 202        RSW(r4), RSW(r5), RSW(r6), RSW(r7),
 203
 204        RPT(r8), RPT(r9), RPT(r10), RPT(r11),
 205        RPT(r12), RPT(r13), RPT(r14), RPT(r15),
 206
 207        RPT(r16), RPT(r17), RPT(r18), RPT(r19),
 208        RPT(r20), RPT(r21), RPT(r22), RPT(r23),
 209        RPT(r24), RPT(r25), RPT(r26), RPT(r27),
 210        RPT(r28), RPT(r29), RPT(r30), RPT(r31)
 211};
 212
 213static u16 fr_info[32]={
 214        0,                      /* constant : WE SHOULD NEVER GET THIS */
 215        0,                      /* constant : WE SHOULD NEVER GET THIS */
 216
 217        RSW(f2), RSW(f3), RSW(f4), RSW(f5),
 218
 219        RPT(f6), RPT(f7), RPT(f8), RPT(f9),
 220        RPT(f10), RPT(f11),
 221
 222        RSW(f12), RSW(f13), RSW(f14),
 223        RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
 224        RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
 225        RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
 226        RSW(f30), RSW(f31)
 227};
 228
 229/* Invalidate ALAT entry for integer register REGNO.  */
 230static void
 231invala_gr (int regno)
 232{
 233#       define F(reg)   case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
 234
 235        switch (regno) {
 236                F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 237                F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 238                F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 239                F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 240                F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 241                F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 242                F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 243                F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 244                F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 245                F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 246                F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 247                F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 248                F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 249                F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 250                F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 251                F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 252        }
 253#       undef F
 254}
 255
 256/* Invalidate ALAT entry for floating-point register REGNO.  */
 257static void
 258invala_fr (int regno)
 259{
 260#       define F(reg)   case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
 261
 262        switch (regno) {
 263                F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 264                F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 265                F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 266                F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 267                F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 268                F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 269                F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 270                F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 271                F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 272                F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 273                F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 274                F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 275                F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 276                F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 277                F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 278                F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 279        }
 280#       undef F
 281}
 282
 283static inline unsigned long
 284rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
 285{
 286        reg += rrb;
 287        if (reg >= sor)
 288                reg -= sor;
 289        return reg;
 290}
 291
 292static void
 293set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 294{
 295        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 296        unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
 297        unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 298        unsigned long rnats, nat_mask;
 299        unsigned long on_kbs;
 300        long sof = (regs->cr_ifs) & 0x7f;
 301        long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 302        long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 303        long ridx = r1 - 32;
 304
 305        if (ridx >= sof) {
 306                /* this should never happen, as the "rsvd register fault" has higher priority */
 307                DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
 308                return;
 309        }
 310
 311        if (ridx < sor)
 312                ridx = rotate_reg(sor, rrb_gr, ridx);
 313
 314        DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 315               r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 316
 317        on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 318        addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 319        if (addr >= kbs) {
 320                /* the register is on the kernel backing store: easy... */
 321                rnat_addr = ia64_rse_rnat_addr(addr);
 322                if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 323                        rnat_addr = &sw->ar_rnat;
 324                nat_mask = 1UL << ia64_rse_slot_num(addr);
 325
 326                *addr = val;
 327                if (nat)
 328                        *rnat_addr |=  nat_mask;
 329                else
 330                        *rnat_addr &= ~nat_mask;
 331                return;
 332        }
 333
 334        /*
 335         * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
 336         * infer whether the interrupt task was running on the kernel backing store.
 337         */
 338        if (regs->r12 >= TASK_SIZE) {
 339                DPRINT("ignoring kernel write to r%lu; register isn't on the RBS!", r1);
 340                return;
 341        }
 342
 343        bspstore = (unsigned long *)regs->ar_bspstore;
 344        ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 345        bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 346        addr    = ia64_rse_skip_regs(bsp, ridx);
 347
 348        DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 349
 350        ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 351
 352        rnat_addr = ia64_rse_rnat_addr(addr);
 353
 354        ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 355        DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
 356               (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
 357
 358        nat_mask = 1UL << ia64_rse_slot_num(addr);
 359        if (nat)
 360                rnats |=  nat_mask;
 361        else
 362                rnats &= ~nat_mask;
 363        ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
 364
 365        DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 366}
 367
 368
 369static void
 370get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
 371{
 372        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 373        unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
 374        unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 375        unsigned long rnats, nat_mask;
 376        unsigned long on_kbs;
 377        long sof = (regs->cr_ifs) & 0x7f;
 378        long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 379        long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 380        long ridx = r1 - 32;
 381
 382        if (ridx >= sof) {
 383                /* read of out-of-frame register returns an undefined value; 0 in our case.  */
 384                DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
 385                goto fail;
 386        }
 387
 388        if (ridx < sor)
 389                ridx = rotate_reg(sor, rrb_gr, ridx);
 390
 391        DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 392               r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 393
 394        on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 395        addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 396        if (addr >= kbs) {
 397                /* the register is on the kernel backing store: easy... */
 398                *val = *addr;
 399                if (nat) {
 400                        rnat_addr = ia64_rse_rnat_addr(addr);
 401                        if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 402                                rnat_addr = &sw->ar_rnat;
 403                        nat_mask = 1UL << ia64_rse_slot_num(addr);
 404                        *nat = (*rnat_addr & nat_mask) != 0;
 405                }
 406                return;
 407        }
 408
 409        /*
 410         * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
 411         * infer whether the interrupt task was running on the kernel backing store.
 412         */
 413        if (regs->r12 >= TASK_SIZE) {
 414                DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 415                goto fail;
 416        }
 417
 418        bspstore = (unsigned long *)regs->ar_bspstore;
 419        ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 420        bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 421        addr    = ia64_rse_skip_regs(bsp, ridx);
 422
 423        DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 424
 425        ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 426
 427        if (nat) {
 428                rnat_addr = ia64_rse_rnat_addr(addr);
 429                nat_mask = 1UL << ia64_rse_slot_num(addr);
 430
 431                DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 432
 433                ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 434                *nat = (rnats & nat_mask) != 0;
 435        }
 436        return;
 437
 438  fail:
 439        *val = 0;
 440        if (nat)
 441                *nat = 0;
 442        return;
 443}
 444
 445
 446static void
 447setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
 448{
 449        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 450        unsigned long addr;
 451        unsigned long bitmask;
 452        unsigned long *unat;
 453
 454        /*
 455         * First takes care of stacked registers
 456         */
 457        if (regnum >= IA64_FIRST_STACKED_GR) {
 458                set_rse_reg(regs, regnum, val, nat);
 459                return;
 460        }
 461
 462        /*
 463         * Using r0 as a target raises a General Exception fault which has higher priority
 464         * than the Unaligned Reference fault.
 465         */
 466
 467        /*
 468         * Now look at registers in [0-31] range and init correct UNAT
 469         */
 470        if (GR_IN_SW(regnum)) {
 471                addr = (unsigned long)sw;
 472                unat = &sw->ar_unat;
 473        } else {
 474                addr = (unsigned long)regs;
 475                unat = &sw->caller_unat;
 476        }
 477        DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
 478               addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
 479        /*
 480         * add offset from base of struct
 481         * and do it !
 482         */
 483        addr += GR_OFFS(regnum);
 484
 485        *(unsigned long *)addr = val;
 486
 487        /*
 488         * We need to clear the corresponding UNAT bit to fully emulate the load
 489         * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
 490         */
 491        bitmask   = 1UL << (addr >> 3 & 0x3f);
 492        DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
 493        if (nat) {
 494                *unat |= bitmask;
 495        } else {
 496                *unat &= ~bitmask;
 497        }
 498        DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
 499}
 500
 501/*
 502 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
 503 * range from 32-127, result is in the range from 0-95.
 504 */
 505static inline unsigned long
 506fph_index (struct pt_regs *regs, long regnum)
 507{
 508        unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
 509        return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
 510}
 511
 512static void
 513setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 514{
 515        struct switch_stack *sw = (struct switch_stack *)regs - 1;
 516        unsigned long addr;
 517
 518        /*
 519         * From EAS-2.5: FPDisableFault has higher priority than Unaligned
 520         * Fault. Thus, when we get here, we know the partition is enabled.
 521         * To update f32-f127, there are three choices:
 522         *
 523         *      (1) save f32-f127 to thread.fph and update the values there
 524         *      (2) use a gigantic switch statement to directly access the registers
 525         *      (3) generate code on the fly to update the desired register
 526         *
 527         * For now, we are using approach (1).
 528         */
 529        if (regnum >= IA64_FIRST_ROTATING_FR) {
 530                ia64_sync_fph(current);
 531                current->thread.fph[fph_index(regs, regnum)] = *fpval;
 532        } else {
 533                /*
 534                 * pt_regs or switch_stack ?
 535                 */
 536                if (FR_IN_SW(regnum)) {
 537                        addr = (unsigned long)sw;
 538                } else {
 539                        addr = (unsigned long)regs;
 540                }
 541
 542                DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
 543
 544                addr += FR_OFFS(regnum);
 545                *(struct ia64_fpreg *)addr = *fpval;
 546
 547                /*
 548                 * mark the low partition as being used now
 549                 *
 550                 * It is highly unlikely that this bit is not already set, but
 551                 * let's do it for safety.
 552                 */
 553                regs->cr_ipsr |= IA64_PSR_MFL;
 554        }
 555}
 556
 557/*
 558 * Those 2 inline functions generate the spilled versions of the constant floating point
 559 * registers which can be used with stfX
 560 */
 561static inline void
 562float_spill_f0 (struct ia64_fpreg *final)
 563{
 564        __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
 565}
 566
 567static inline void
 568float_spill_f1 (struct ia64_fpreg *final)
 569{
 570        __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
 571}
 572
 573static void
 574getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 575{
 576        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 577        unsigned long addr;
 578
 579        /*
 580         * From EAS-2.5: FPDisableFault has higher priority than
 581         * Unaligned Fault. Thus, when we get here, we know the partition is
 582         * enabled.
 583         *
 584         * When regnum > 31, the register is still live and we need to force a save
 585         * to current->thread.fph to get access to it.  See discussion in setfpreg()
 586         * for reasons and other ways of doing this.
 587         */
 588        if (regnum >= IA64_FIRST_ROTATING_FR) {
 589                ia64_flush_fph(current);
 590                *fpval = current->thread.fph[fph_index(regs, regnum)];
 591        } else {
 592                /*
 593                 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
 594                 * not saved, we must generate their spilled form on the fly
 595                 */
 596                switch(regnum) {
 597                case 0:
 598                        float_spill_f0(fpval);
 599                        break;
 600                case 1:
 601                        float_spill_f1(fpval);
 602                        break;
 603                default:
 604                        /*
 605                         * pt_regs or switch_stack ?
 606                         */
 607                        addr =  FR_IN_SW(regnum) ? (unsigned long)sw
 608                                                 : (unsigned long)regs;
 609
 610                        DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
 611                               FR_IN_SW(regnum), addr, FR_OFFS(regnum));
 612
 613                        addr  += FR_OFFS(regnum);
 614                        *fpval = *(struct ia64_fpreg *)addr;
 615                }
 616        }
 617}
 618
 619
 620static void
 621getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
 622{
 623        struct switch_stack *sw = (struct switch_stack *) regs - 1;
 624        unsigned long addr, *unat;
 625
 626        if (regnum >= IA64_FIRST_STACKED_GR) {
 627                get_rse_reg(regs, regnum, val, nat);
 628                return;
 629        }
 630
 631        /*
 632         * take care of r0 (read-only always evaluate to 0)
 633         */
 634        if (regnum == 0) {
 635                *val = 0;
 636                if (nat)
 637                        *nat = 0;
 638                return;
 639        }
 640
 641        /*
 642         * Now look at registers in [0-31] range and init correct UNAT
 643         */
 644        if (GR_IN_SW(regnum)) {
 645                addr = (unsigned long)sw;
 646                unat = &sw->ar_unat;
 647        } else {
 648                addr = (unsigned long)regs;
 649                unat = &sw->caller_unat;
 650        }
 651
 652        DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
 653
 654        addr += GR_OFFS(regnum);
 655
 656        *val  = *(unsigned long *)addr;
 657
 658        /*
 659         * do it only when requested
 660         */
 661        if (nat)
 662                *nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
 663}
 664
 665static void
 666emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
 667{
 668        /*
 669         * IMPORTANT:
 670         * Given the way we handle unaligned speculative loads, we should
 671         * not get to this point in the code but we keep this sanity check,
 672         * just in case.
 673         */
 674        if (ld.x6_op == 1 || ld.x6_op == 3) {
 675                printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
 676                die_if_kernel("unaligned reference on speculative load with register update\n",
 677                              regs, 30);
 678        }
 679
 680
 681        /*
 682         * at this point, we know that the base register to update is valid i.e.,
 683         * it's not r0
 684         */
 685        if (type == UPD_IMMEDIATE) {
 686                unsigned long imm;
 687
 688                /*
 689                 * Load +Imm: ldXZ r1=[r3],imm(9)
 690                 *
 691                 *
 692                 * form imm9: [13:19] contain the first 7 bits
 693                 */
 694                imm = ld.x << 7 | ld.imm;
 695
 696                /*
 697                 * sign extend (1+8bits) if m set
 698                 */
 699                if (ld.m) imm |= SIGN_EXT9;
 700
 701                /*
 702                 * ifa == r3 and we know that the NaT bit on r3 was clear so
 703                 * we can directly use ifa.
 704                 */
 705                ifa += imm;
 706
 707                setreg(ld.r3, ifa, 0, regs);
 708
 709                DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
 710
 711        } else if (ld.m) {
 712                unsigned long r2;
 713                int nat_r2;
 714
 715                /*
 716                 * Load +Reg Opcode: ldXZ r1=[r3],r2
 717                 *
 718                 * Note: that we update r3 even in the case of ldfX.a
 719                 * (where the load does not happen)
 720                 *
 721                 * The way the load algorithm works, we know that r3 does not
 722                 * have its NaT bit set (would have gotten NaT consumption
 723                 * before getting the unaligned fault). So we can use ifa
 724                 * which equals r3 at this point.
 725                 *
 726                 * IMPORTANT:
 727                 * The above statement holds ONLY because we know that we
 728                 * never reach this code when trying to do a ldX.s.
 729                 * If we ever make it to here on an ldfX.s then
 730                 */
 731                getreg(ld.imm, &r2, &nat_r2, regs);
 732
 733                ifa += r2;
 734
 735                /*
 736                 * propagate Nat r2 -> r3
 737                 */
 738                setreg(ld.r3, ifa, nat_r2, regs);
 739
 740                DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
 741        }
 742}
 743
 744
 745static int
 746emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 747{
 748        unsigned int len = 1 << ld.x6_sz;
 749
 750        /*
 751         * r0, as target, doesn't need to be checked because Illegal Instruction
 752         * faults have higher priority than unaligned faults.
 753         *
 754         * r0 cannot be found as the base as it would never generate an
 755         * unaligned reference.
 756         */
 757
 758        /*
 759         * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.
 760         * See comment below for explanation on how we handle ldX.a
 761         */
 762        if (ld.x6_op != 0x2) {
 763                unsigned long val = 0;
 764
 765                if (len != 2 && len != 4 && len != 8) {
 766                        DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 767                        return -1;
 768                }
 769                /* this assumes little-endian byte-order: */
 770                if (copy_from_user(&val, (void *) ifa, len))
 771                    return -1;
 772                setreg(ld.r1, val, 0, regs);
 773        }
 774
 775        /*
 776         * check for updates on any kind of loads
 777         */
 778        if (ld.op == 0x5 || ld.m)
 779                emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
 780
 781        /*
 782         * handling of various loads (based on EAS2.4):
 783         *
 784         * ldX.acq (ordered load):
 785         *      - acquire semantics would have been used, so force fence instead.
 786         *
 787         * ldX.c.clr (check load and clear):
 788         *      - if we get to this handler, it's because the entry was not in the ALAT.
 789         *        Therefore the operation reverts to a normal load
 790         *
 791         * ldX.c.nc (check load no clear):
 792         *      - same as previous one
 793         *
 794         * ldX.c.clr.acq (ordered check load and clear):
 795         *      - same as above for c.clr part. The load needs to have acquire semantics. So
 796         *        we use the fence semantics which is stronger and thus ensures correctness.
 797         *
 798         * ldX.a (advanced load):
 799         *      - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
 800         *        address doesn't match requested size alignement. This means that we would
 801         *        possibly need more than one load to get the result.
 802         *
 803         *        The load part can be handled just like a normal load, however the difficult
 804         *        part is to get the right thing into the ALAT. The critical piece of information
 805         *        in the base address of the load & size. To do that, a ld.a must be executed,
 806         *        clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
 807         *        if we use the same target register, we will be okay for the check.a instruction.
 808         *        If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
 809         *        which would overlap within [r3,r3+X] (the size of the load was store in the
 810         *        ALAT). If such an entry is found the entry is invalidated. But this is not good
 811         *        enough, take the following example:
 812         *              r3=3
 813         *              ld4.a r1=[r3]
 814         *
 815         *        Could be emulated by doing:
 816         *              ld1.a r1=[r3],1
 817         *              store to temporary;
 818         *              ld1.a r1=[r3],1
 819         *              store & shift to temporary;
 820         *              ld1.a r1=[r3],1
 821         *              store & shift to temporary;
 822         *              ld1.a r1=[r3]
 823         *              store & shift to temporary;
 824         *              r1=temporary
 825         *
 826         *        So int this case, you would get the right value is r1 but the wrong info in
 827         *        the ALAT.  Notice that you could do it in reverse to finish with address 3
 828         *        but you would still get the size wrong.  To get the size right, one needs to
 829         *        execute exactly the same kind of load. You could do it from a aligned
 830         *        temporary location, but you would get the address wrong.
 831         *
 832         *        So no matter what, it is not possible to emulate an advanced load
 833         *        correctly. But is that really critical ?
 834         *
 835         *
 836         *        Now one has to look at how ld.a is used, one must either do a ld.c.* or
 837         *        chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
 838         *        entry found in ALAT), and that's perfectly ok because:
 839         *
 840         *              - ld.c.*, if the entry is not present a  normal load is executed
 841         *              - chk.a.*, if the entry is not present, execution jumps to recovery code
 842         *
 843         *        In either case, the load can be potentially retried in another form.
 844         *
 845         *        So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
 846         *        must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
 847         *        a stale entry later) The register base update MUST also be performed.
 848         *
 849         *        Now what is the content of the register and its NaT bit in the case we don't
 850         *        do the load ?  EAS2.4, says (in case an actual load is needed)
 851         *
 852         *              - r1 = [r3], Nat = 0 if succeeds
 853         *              - r1 = 0 Nat = 0 if trying to access non-speculative memory
 854         *
 855         *        For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
 856         *        retry and thus eventually reload the register thereby changing Nat and
 857         *        register content.
 858         */
 859
 860        /*
 861         * when the load has the .acq completer then
 862         * use ordering fence.
 863         */
 864        if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
 865                mb();
 866
 867        /*
 868         * invalidate ALAT entry in case of advanced load
 869         */
 870        if (ld.x6_op == 0x2)
 871                invala_gr(ld.r1);
 872
 873        return 0;
 874}
 875
 876static int
 877emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 878{
 879        unsigned long r2;
 880        unsigned int len = 1 << ld.x6_sz;
 881
 882        /*
 883         * if we get to this handler, Nat bits on both r3 and r2 have already
 884         * been checked. so we don't need to do it
 885         *
 886         * extract the value to be stored
 887         */
 888        getreg(ld.imm, &r2, 0, regs);
 889
 890        /*
 891         * we rely on the macros in unaligned.h for now i.e.,
 892         * we let the compiler figure out how to read memory gracefully.
 893         *
 894         * We need this switch/case because the way the inline function
 895         * works. The code is optimized by the compiler and looks like
 896         * a single switch/case.
 897         */
 898        DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
 899
 900        if (len != 2 && len != 4 && len != 8) {
 901                DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 902                return -1;
 903        }
 904
 905        /* this assumes little-endian byte-order: */
 906        if (copy_to_user((void *) ifa, &r2, len))
 907                return -1;
 908
 909        /*
 910         * stX [r3]=r2,imm(9)
 911         *
 912         * NOTE:
 913         * ld.r3 can never be r0, because r0 would not generate an
 914         * unaligned access.
 915         */
 916        if (ld.op == 0x5) {
 917                unsigned long imm;
 918
 919                /*
 920                 * form imm9: [12:6] contain first 7bits
 921                 */
 922                imm = ld.x << 7 | ld.r1;
 923                /*
 924                 * sign extend (8bits) if m set
 925                 */
 926                if (ld.m) imm |= SIGN_EXT9;
 927                /*
 928                 * ifa == r3 (NaT is necessarily cleared)
 929                 */
 930                ifa += imm;
 931
 932                DPRINT("imm=%lx r3=%lx\n", imm, ifa);
 933
 934                setreg(ld.r3, ifa, 0, regs);
 935        }
 936        /*
 937         * we don't have alat_invalidate_multiple() so we need
 938         * to do the complete flush :-<<
 939         */
 940        ia64_invala();
 941
 942        /*
 943         * stX.rel: use fence instead of release
 944         */
 945        if (ld.x6_op == 0xd)
 946                mb();
 947
 948        return 0;
 949}
 950
 951/*
 952 * floating point operations sizes in bytes
 953 */
 954static const unsigned char float_fsz[4]={
 955        10, /* extended precision (e) */
 956        8,  /* integer (8)            */
 957        4,  /* single precision (s)   */
 958        8   /* double precision (d)   */
 959};
 960
 961static inline void
 962mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 963{
 964        __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
 965                              :: "r"(init), "r"(final) : "f6","memory");
 966}
 967
 968static inline void
 969mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 970{
 971        __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
 972                              :: "r"(init), "r"(final) : "f6","memory");
 973}
 974
 975static inline void
 976mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 977{
 978        __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
 979                              :: "r"(init), "r"(final) : "f6","memory");
 980}
 981
 982static inline void
 983mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
 984{
 985        __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
 986                              :: "r"(init), "r"(final) : "f6","memory");
 987}
 988
 989static inline void
 990float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 991{
 992        __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
 993                              :: "r"(init), "r"(final) : "f6","memory");
 994}
 995
 996static inline void
 997float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 998{
 999        __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
1000                              :: "r"(init), "r"(final) : "f6","memory");
1001}
1002
1003static inline void
1004float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1005{
1006        __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
1007                              :: "r"(init), "r"(final) : "f6","memory");
1008}
1009
1010static inline void
1011float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1012{
1013        __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
1014                              :: "r"(init), "r"(final) : "f6","memory");
1015}
1016
1017static int
1018emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1019{
1020        struct ia64_fpreg fpr_init[2];
1021        struct ia64_fpreg fpr_final[2];
1022        unsigned long len = float_fsz[ld.x6_sz];
1023
1024        /*
1025         * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1026         * higher priority than unaligned faults.
1027         *
1028         * r0 cannot be found as the base as it would never generate an unaligned
1029         * reference.
1030         */
1031
1032        /*
1033         * make sure we get clean buffers
1034         */
1035        memset(&fpr_init, 0, sizeof(fpr_init));
1036        memset(&fpr_final, 0, sizeof(fpr_final));
1037
1038        /*
1039         * ldfpX.a: we don't try to emulate anything but we must
1040         * invalidate the ALAT entry and execute updates, if any.
1041         */
1042        if (ld.x6_op != 0x2) {
1043                /*
1044                 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
1045                 * instruction:
1046                 */
1047                if (copy_from_user(&fpr_init[0], (void *) ifa, len)
1048                    || copy_from_user(&fpr_init[1], (void *) (ifa + len), len))
1049                        return -1;
1050
1051                DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1052                DDUMP("frp_init =", &fpr_init, 2*len);
1053                /*
1054                 * XXX fixme
1055                 * Could optimize inlines by using ldfpX & 2 spills
1056                 */
1057                switch( ld.x6_sz ) {
1058                        case 0:
1059                                mem2float_extended(&fpr_init[0], &fpr_final[0]);
1060                                mem2float_extended(&fpr_init[1], &fpr_final[1]);
1061                                break;
1062                        case 1:
1063                                mem2float_integer(&fpr_init[0], &fpr_final[0]);
1064                                mem2float_integer(&fpr_init[1], &fpr_final[1]);
1065                                break;
1066                        case 2:
1067                                mem2float_single(&fpr_init[0], &fpr_final[0]);
1068                                mem2float_single(&fpr_init[1], &fpr_final[1]);
1069                                break;
1070                        case 3:
1071                                mem2float_double(&fpr_init[0], &fpr_final[0]);
1072                                mem2float_double(&fpr_init[1], &fpr_final[1]);
1073                                break;
1074                }
1075                DDUMP("fpr_final =", &fpr_final, 2*len);
1076                /*
1077                 * XXX fixme
1078                 *
1079                 * A possible optimization would be to drop fpr_final and directly
1080                 * use the storage from the saved context i.e., the actual final
1081                 * destination (pt_regs, switch_stack or thread structure).
1082                 */
1083                setfpreg(ld.r1, &fpr_final[0], regs);
1084                setfpreg(ld.imm, &fpr_final[1], regs);
1085        }
1086
1087        /*
1088         * Check for updates: only immediate updates are available for this
1089         * instruction.
1090         */
1091        if (ld.m) {
1092                /*
1093                 * the immediate is implicit given the ldsz of the operation:
1094                 * single: 8 (2x4) and for  all others it's 16 (2x8)
1095                 */
1096                ifa += len<<1;
1097
1098                /*
1099                 * IMPORTANT:
1100                 * the fact that we force the NaT of r3 to zero is ONLY valid
1101                 * as long as we don't come here with a ldfpX.s.
1102                 * For this reason we keep this sanity check
1103                 */
1104                if (ld.x6_op == 1 || ld.x6_op == 3)
1105                        printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1106                               __FUNCTION__);
1107
1108                setreg(ld.r3, ifa, 0, regs);
1109        }
1110
1111        /*
1112         * Invalidate ALAT entries, if any, for both registers.
1113         */
1114        if (ld.x6_op == 0x2) {
1115                invala_fr(ld.r1);
1116                invala_fr(ld.imm);
1117        }
1118        return 0;
1119}
1120
1121
1122static int
1123emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1124{
1125        struct ia64_fpreg fpr_init;
1126        struct ia64_fpreg fpr_final;
1127        unsigned long len = float_fsz[ld.x6_sz];
1128
1129        /*
1130         * fr0 & fr1 don't need to be checked because Illegal Instruction
1131         * faults have higher priority than unaligned faults.
1132         *
1133         * r0 cannot be found as the base as it would never generate an
1134         * unaligned reference.
1135         */
1136
1137        /*
1138         * make sure we get clean buffers
1139         */
1140        memset(&fpr_init,0, sizeof(fpr_init));
1141        memset(&fpr_final,0, sizeof(fpr_final));
1142
1143        /*
1144         * ldfX.a we don't try to emulate anything but we must
1145         * invalidate the ALAT entry.
1146         * See comments in ldX for descriptions on how the various loads are handled.
1147         */
1148        if (ld.x6_op != 0x2) {
1149                if (copy_from_user(&fpr_init, (void *) ifa, len))
1150                        return -1;
1151
1152                DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1153                DDUMP("fpr_init =", &fpr_init, len);
1154                /*
1155                 * we only do something for x6_op={0,8,9}
1156                 */
1157                switch( ld.x6_sz ) {
1158                        case 0:
1159                                mem2float_extended(&fpr_init, &fpr_final);
1160                                break;
1161                        case 1:
1162                                mem2float_integer(&fpr_init, &fpr_final);
1163                                break;
1164                        case 2:
1165                                mem2float_single(&fpr_init, &fpr_final);
1166                                break;
1167                        case 3:
1168                                mem2float_double(&fpr_init, &fpr_final);
1169                                break;
1170                }
1171                DDUMP("fpr_final =", &fpr_final, len);
1172                /*
1173                 * XXX fixme
1174                 *
1175                 * A possible optimization would be to drop fpr_final and directly
1176                 * use the storage from the saved context i.e., the actual final
1177                 * destination (pt_regs, switch_stack or thread structure).
1178                 */
1179                setfpreg(ld.r1, &fpr_final, regs);
1180        }
1181
1182        /*
1183         * check for updates on any loads
1184         */
1185        if (ld.op == 0x7 || ld.m)
1186                emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1187
1188        /*
1189         * invalidate ALAT entry in case of advanced floating point loads
1190         */
1191        if (ld.x6_op == 0x2)
1192                invala_fr(ld.r1);
1193
1194        return 0;
1195}
1196
1197
1198static int
1199emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1200{
1201        struct ia64_fpreg fpr_init;
1202        struct ia64_fpreg fpr_final;
1203        unsigned long len = float_fsz[ld.x6_sz];
1204
1205        /*
1206         * make sure we get clean buffers
1207         */
1208        memset(&fpr_init,0, sizeof(fpr_init));
1209        memset(&fpr_final,0, sizeof(fpr_final));
1210
1211        /*
1212         * if we get to this handler, Nat bits on both r3 and r2 have already
1213         * been checked. so we don't need to do it
1214         *
1215         * extract the value to be stored
1216         */
1217        getfpreg(ld.imm, &fpr_init, regs);
1218        /*
1219         * during this step, we extract the spilled registers from the saved
1220         * context i.e., we refill. Then we store (no spill) to temporary
1221         * aligned location
1222         */
1223        switch( ld.x6_sz ) {
1224                case 0:
1225                        float2mem_extended(&fpr_init, &fpr_final);
1226                        break;
1227                case 1:
1228                        float2mem_integer(&fpr_init, &fpr_final);
1229                        break;
1230                case 2:
1231                        float2mem_single(&fpr_init, &fpr_final);
1232                        break;
1233                case 3:
1234                        float2mem_double(&fpr_init, &fpr_final);
1235                        break;
1236        }
1237        DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1238        DDUMP("fpr_init =", &fpr_init, len);
1239        DDUMP("fpr_final =", &fpr_final, len);
1240
1241        if (copy_to_user((void *) ifa, &fpr_final, len))
1242                return -1;
1243
1244        /*
1245         * stfX [r3]=r2,imm(9)
1246         *
1247         * NOTE:
1248         * ld.r3 can never be r0, because r0 would not generate an
1249         * unaligned access.
1250         */
1251        if (ld.op == 0x7) {
1252                unsigned long imm;
1253
1254                /*
1255                 * form imm9: [12:6] contain first 7bits
1256                 */
1257                imm = ld.x << 7 | ld.r1;
1258                /*
1259                 * sign extend (8bits) if m set
1260                 */
1261                if (ld.m)
1262                        imm |= SIGN_EXT9;
1263                /*
1264                 * ifa == r3 (NaT is necessarily cleared)
1265                 */
1266                ifa += imm;
1267
1268                DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1269
1270                setreg(ld.r3, ifa, 0, regs);
1271        }
1272        /*
1273         * we don't have alat_invalidate_multiple() so we need
1274         * to do the complete flush :-<<
1275         */
1276        ia64_invala();
1277
1278        return 0;
1279}
1280
1281/*
1282 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1283 * eventually fix the program.  However, we don't want to do that for every access so we
1284 * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
1285 * either...
1286 */
1287static int
1288within_logging_rate_limit (void)
1289{
1290        static unsigned long count, last_time;
1291
1292        if (jiffies - last_time > 5*HZ)
1293                count = 0;
1294        if (++count < 5) {
1295                last_time = jiffies;
1296                return 1;
1297        }
1298        return 0;
1299
1300}
1301
1302void
1303ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1304{
1305        struct exception_fixup fix = { 0 };
1306        struct ia64_psr *ipsr = ia64_psr(regs);
1307        mm_segment_t old_fs = get_fs();
1308        unsigned long bundle[2];
1309        unsigned long opcode;
1310        struct siginfo si;
1311        union {
1312                unsigned long l;
1313                load_store_t insn;
1314        } u;
1315        int ret = -1;
1316
1317        if (ia64_psr(regs)->be) {
1318                /* we don't support big-endian accesses */
1319                die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1320                goto force_sigbus;
1321        }
1322
1323        /*
1324         * Treat kernel accesses for which there is an exception handler entry the same as
1325         * user-level unaligned accesses.  Otherwise, a clever program could trick this
1326         * handler into reading an arbitrary kernel addresses...
1327         */
1328        if (!user_mode(regs)) {
1329                fix = SEARCH_EXCEPTION_TABLE(regs);
1330        }
1331        if (user_mode(regs) || fix.cont) {
1332                if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1333                        goto force_sigbus;
1334
1335                if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1336                    && within_logging_rate_limit())
1337                {
1338                        char buf[200];  /* comm[] is at most 16 bytes... */
1339                        size_t len;
1340
1341                        len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1342                                      "ip=0x%016lx\n\r", current->comm, current->pid,
1343                                      ifa, regs->cr_iip + ipsr->ri);
1344                        /*
1345                         * Don't call tty_write_message() if we're in the kernel; we might
1346                         * be holding locks...
1347                         */
1348                        if (user_mode(regs))
1349                                tty_write_message(current->tty, buf);
1350                        buf[len-1] = '\0';      /* drop '\r' */
1351                        printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
1352                }
1353        } else {
1354                if (within_logging_rate_limit())
1355                        printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1356                               ifa, regs->cr_iip + ipsr->ri);
1357                set_fs(KERNEL_DS);
1358        }
1359
1360        DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1361               regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1362
1363        if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
1364                goto failure;
1365
1366        /*
1367         * extract the instruction from the bundle given the slot number
1368         */
1369        switch (ipsr->ri) {
1370              case 0: u.l = (bundle[0] >>  5); break;
1371              case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1372              case 2: u.l = (bundle[1] >> 23); break;
1373        }
1374        opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1375
1376        DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1377               "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1378               u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1379
1380        /*
1381         * IMPORTANT:
1382         * Notice that the switch statement DOES not cover all possible instructions
1383         * that DO generate unaligned references. This is made on purpose because for some
1384         * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1385         * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1386         * the program will get a signal and die:
1387         *
1388         *      load/store:
1389         *              - ldX.spill
1390         *              - stX.spill
1391         *      Reason: RNATs are based on addresses
1392         *
1393         *      synchronization:
1394         *              - cmpxchg
1395         *              - fetchadd
1396         *              - xchg
1397         *      Reason: ATOMIC operations cannot be emulated properly using multiple
1398         *              instructions.
1399         *
1400         *      speculative loads:
1401         *              - ldX.sZ
1402         *      Reason: side effects, code must be ready to deal with failure so simpler
1403         *              to let the load fail.
1404         * ---------------------------------------------------------------------------------
1405         * XXX fixme
1406         *
1407         * I would like to get rid of this switch case and do something
1408         * more elegant.
1409         */
1410        switch (opcode) {
1411              case LDS_OP:
1412              case LDSA_OP:
1413              case LDS_IMM_OP:
1414              case LDSA_IMM_OP:
1415              case LDFS_OP:
1416              case LDFSA_OP:
1417              case LDFS_IMM_OP:
1418                /*
1419                 * The instruction will be retried with deferred exceptions turned on, and
1420                 * we should get Nat bit installed
1421                 *
1422                 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1423                 * are actually executed even though the operation failed. So we don't
1424                 * need to take care of this.
1425                 */
1426                DPRINT("forcing PSR_ED\n");
1427                regs->cr_ipsr |= IA64_PSR_ED;
1428                goto done;
1429
1430              case LD_OP:
1431              case LDA_OP:
1432              case LDBIAS_OP:
1433              case LDACQ_OP:
1434              case LDCCLR_OP:
1435              case LDCNC_OP:
1436              case LDCCLRACQ_OP:
1437              case LD_IMM_OP:
1438              case LDA_IMM_OP:
1439              case LDBIAS_IMM_OP:
1440              case LDACQ_IMM_OP:
1441              case LDCCLR_IMM_OP:
1442              case LDCNC_IMM_OP:
1443              case LDCCLRACQ_IMM_OP:
1444                ret = emulate_load_int(ifa, u.insn, regs);
1445                break;
1446
1447              case ST_OP:
1448              case STREL_OP:
1449              case ST_IMM_OP:
1450              case STREL_IMM_OP:
1451                ret = emulate_store_int(ifa, u.insn, regs);
1452                break;
1453
1454              case LDF_OP:
1455              case LDFA_OP:
1456              case LDFCCLR_OP:
1457              case LDFCNC_OP:
1458              case LDF_IMM_OP:
1459              case LDFA_IMM_OP:
1460              case LDFCCLR_IMM_OP:
1461              case LDFCNC_IMM_OP:
1462                if (u.insn.x)
1463                        ret = emulate_load_floatpair(ifa, u.insn, regs);
1464                else
1465                        ret = emulate_load_float(ifa, u.insn, regs);
1466                break;
1467
1468              case STF_OP:
1469              case STF_IMM_OP:
1470                ret = emulate_store_float(ifa, u.insn, regs);
1471                break;
1472
1473              default:
1474                goto failure;
1475        }
1476        DPRINT("ret=%d\n", ret);
1477        if (ret)
1478                goto failure;
1479
1480        if (ipsr->ri == 2)
1481                /*
1482                 * given today's architecture this case is not likely to happen because a
1483                 * memory access instruction (M) can never be in the last slot of a
1484                 * bundle. But let's keep it for now.
1485                 */
1486                regs->cr_iip += 16;
1487        ipsr->ri = (ipsr->ri + 1) & 0x3;
1488
1489        DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1490  done:
1491        set_fs(old_fs);         /* restore original address limit */
1492        return;
1493
1494  failure:
1495        /* something went wrong... */
1496        if (!user_mode(regs)) {
1497                if (fix.cont) {
1498                        handle_exception(regs, fix);
1499                        goto done;
1500                }
1501                die_if_kernel("error during unaligned kernel access\n", regs, ret);
1502                /* NOT_REACHED */
1503        }
1504  force_sigbus:
1505        si.si_signo = SIGBUS;
1506        si.si_errno = 0;
1507        si.si_code = BUS_ADRALN;
1508        si.si_addr = (void *) ifa;
1509        si.si_flags = 0;
1510        si.si_isr = 0;
1511        si.si_imm = 0;
1512        force_sig_info(SIGBUS, &si, current);
1513        goto done;
1514}
1515
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.