linux/arch/sparc/kernel/visemul.c
<<
>>
Prefs
   1/* visemul.c: Emulation of VIS instructions.
   2 *
   3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
   4 */
   5#include <linux/kernel.h>
   6#include <linux/errno.h>
   7#include <linux/thread_info.h>
   8
   9#include <asm/ptrace.h>
  10#include <asm/pstate.h>
  11#include <asm/system.h>
  12#include <asm/fpumacro.h>
  13#include <asm/uaccess.h>
  14
  15/* OPF field of various VIS instructions.  */
  16
  17/* 000111011 - four 16-bit packs  */
  18#define FPACK16_OPF     0x03b
  19
  20/* 000111010 - two 32-bit packs  */
  21#define FPACK32_OPF     0x03a
  22
  23/* 000111101 - four 16-bit packs  */
  24#define FPACKFIX_OPF    0x03d
  25
  26/* 001001101 - four 16-bit expands  */
  27#define FEXPAND_OPF     0x04d
  28
  29/* 001001011 - two 32-bit merges */
  30#define FPMERGE_OPF     0x04b
  31
  32/* 000110001 - 8-by-16-bit partitoned product  */
  33#define FMUL8x16_OPF    0x031
  34
  35/* 000110011 - 8-by-16-bit upper alpha partitioned product  */
  36#define FMUL8x16AU_OPF  0x033
  37
  38/* 000110101 - 8-by-16-bit lower alpha partitioned product  */
  39#define FMUL8x16AL_OPF  0x035
  40
  41/* 000110110 - upper 8-by-16-bit partitioned product  */
  42#define FMUL8SUx16_OPF  0x036
  43
  44/* 000110111 - lower 8-by-16-bit partitioned product  */
  45#define FMUL8ULx16_OPF  0x037
  46
  47/* 000111000 - upper 8-by-16-bit partitioned product  */
  48#define FMULD8SUx16_OPF 0x038
  49
  50/* 000111001 - lower unsigned 8-by-16-bit partitioned product  */
  51#define FMULD8ULx16_OPF 0x039
  52
  53/* 000101000 - four 16-bit compare; set rd if src1 > src2  */
  54#define FCMPGT16_OPF    0x028
  55
  56/* 000101100 - two 32-bit compare; set rd if src1 > src2  */
  57#define FCMPGT32_OPF    0x02c
  58
  59/* 000100000 - four 16-bit compare; set rd if src1 <= src2  */
  60#define FCMPLE16_OPF    0x020
  61
  62/* 000100100 - two 32-bit compare; set rd if src1 <= src2  */
  63#define FCMPLE32_OPF    0x024
  64
  65/* 000100010 - four 16-bit compare; set rd if src1 != src2  */
  66#define FCMPNE16_OPF    0x022
  67
  68/* 000100110 - two 32-bit compare; set rd if src1 != src2  */
  69#define FCMPNE32_OPF    0x026
  70
  71/* 000101010 - four 16-bit compare; set rd if src1 == src2  */
  72#define FCMPEQ16_OPF    0x02a
  73
  74/* 000101110 - two 32-bit compare; set rd if src1 == src2  */
  75#define FCMPEQ32_OPF    0x02e
  76
  77/* 000000000 - Eight 8-bit edge boundary processing  */
  78#define EDGE8_OPF       0x000
  79
  80/* 000000001 - Eight 8-bit edge boundary processing, no CC */
  81#define EDGE8N_OPF      0x001
  82
  83/* 000000010 - Eight 8-bit edge boundary processing, little-endian  */
  84#define EDGE8L_OPF      0x002
  85
  86/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC  */
  87#define EDGE8LN_OPF     0x003
  88
  89/* 000000100 - Four 16-bit edge boundary processing  */
  90#define EDGE16_OPF      0x004
  91
  92/* 000000101 - Four 16-bit edge boundary processing, no CC  */
  93#define EDGE16N_OPF     0x005
  94
  95/* 000000110 - Four 16-bit edge boundary processing, little-endian  */
  96#define EDGE16L_OPF     0x006
  97
  98/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC  */
  99#define EDGE16LN_OPF    0x007
 100
 101/* 000001000 - Two 32-bit edge boundary processing  */
 102#define EDGE32_OPF      0x008
 103
 104/* 000001001 - Two 32-bit edge boundary processing, no CC  */
 105#define EDGE32N_OPF     0x009
 106
 107/* 000001010 - Two 32-bit edge boundary processing, little-endian  */
 108#define EDGE32L_OPF     0x00a
 109
 110/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC  */
 111#define EDGE32LN_OPF    0x00b
 112
 113/* 000111110 - distance between 8 8-bit components  */
 114#define PDIST_OPF       0x03e
 115
 116/* 000010000 - convert 8-bit 3-D address to blocked byte address  */
 117#define ARRAY8_OPF      0x010
 118
 119/* 000010010 - convert 16-bit 3-D address to blocked byte address  */
 120#define ARRAY16_OPF     0x012
 121
 122/* 000010100 - convert 32-bit 3-D address to blocked byte address  */
 123#define ARRAY32_OPF     0x014
 124
 125/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE  */
 126#define BMASK_OPF       0x019
 127
 128/* 001001100 - Permute bytes as specified by GSR.MASK  */
 129#define BSHUFFLE_OPF    0x04c
 130
 131#define VIS_OPF_SHIFT   5
 132#define VIS_OPF_MASK    (0x1ff << VIS_OPF_SHIFT)
 133
 134#define RS1(INSN)       (((INSN) >> 14) & 0x1f)
 135#define RS2(INSN)       (((INSN) >>  0) & 0x1f)
 136#define RD(INSN)        (((INSN) >> 25) & 0x1f)
 137
 138static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
 139                                       unsigned int rd, int from_kernel)
 140{
 141        if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
 142                if (from_kernel != 0)
 143                        __asm__ __volatile__("flushw");
 144                else
 145                        flushw_user();
 146        }
 147}
 148
 149static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 150{
 151        unsigned long value;
 152        
 153        if (reg < 16)
 154                return (!reg ? 0 : regs->u_regs[reg]);
 155        if (regs->tstate & TSTATE_PRIV) {
 156                struct reg_window *win;
 157                win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
 158                value = win->locals[reg - 16];
 159        } else if (test_thread_flag(TIF_32BIT)) {
 160                struct reg_window32 __user *win32;
 161                win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
 162                get_user(value, &win32->locals[reg - 16]);
 163        } else {
 164                struct reg_window __user *win;
 165                win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
 166                get_user(value, &win->locals[reg - 16]);
 167        }
 168        return value;
 169}
 170
 171static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
 172                                                          struct pt_regs *regs)
 173{
 174        BUG_ON(reg < 16);
 175        BUG_ON(regs->tstate & TSTATE_PRIV);
 176
 177        if (test_thread_flag(TIF_32BIT)) {
 178                struct reg_window32 __user *win32;
 179                win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
 180                return (unsigned long __user *)&win32->locals[reg - 16];
 181        } else {
 182                struct reg_window __user *win;
 183                win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
 184                return &win->locals[reg - 16];
 185        }
 186}
 187
 188static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
 189                                                   struct pt_regs *regs)
 190{
 191        BUG_ON(reg >= 16);
 192        BUG_ON(regs->tstate & TSTATE_PRIV);
 193
 194        return &regs->u_regs[reg];
 195}
 196
 197static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
 198{
 199        if (rd < 16) {
 200                unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
 201
 202                *rd_kern = val;
 203        } else {
 204                unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
 205
 206                if (test_thread_flag(TIF_32BIT))
 207                        __put_user((u32)val, (u32 __user *)rd_user);
 208                else
 209                        __put_user(val, rd_user);
 210        }
 211}
 212
 213static inline unsigned long fpd_regval(struct fpustate *f,
 214                                       unsigned int insn_regnum)
 215{
 216        insn_regnum = (((insn_regnum & 1) << 5) |
 217                       (insn_regnum & 0x1e));
 218
 219        return *(unsigned long *) &f->regs[insn_regnum];
 220}
 221
 222static inline unsigned long *fpd_regaddr(struct fpustate *f,
 223                                         unsigned int insn_regnum)
 224{
 225        insn_regnum = (((insn_regnum & 1) << 5) |
 226                       (insn_regnum & 0x1e));
 227
 228        return (unsigned long *) &f->regs[insn_regnum];
 229}
 230
 231static inline unsigned int fps_regval(struct fpustate *f,
 232                                      unsigned int insn_regnum)
 233{
 234        return f->regs[insn_regnum];
 235}
 236
 237static inline unsigned int *fps_regaddr(struct fpustate *f,
 238                                        unsigned int insn_regnum)
 239{
 240        return &f->regs[insn_regnum];
 241}
 242
 243struct edge_tab {
 244        u16 left, right;
 245};
 246static struct edge_tab edge8_tab[8] = {
 247        { 0xff, 0x80 },
 248        { 0x7f, 0xc0 },
 249        { 0x3f, 0xe0 },
 250        { 0x1f, 0xf0 },
 251        { 0x0f, 0xf8 },
 252        { 0x07, 0xfc },
 253        { 0x03, 0xfe },
 254        { 0x01, 0xff },
 255};
 256static struct edge_tab edge8_tab_l[8] = {
 257        { 0xff, 0x01 },
 258        { 0xfe, 0x03 },
 259        { 0xfc, 0x07 },
 260        { 0xf8, 0x0f },
 261        { 0xf0, 0x1f },
 262        { 0xe0, 0x3f },
 263        { 0xc0, 0x7f },
 264        { 0x80, 0xff },
 265};
 266static struct edge_tab edge16_tab[4] = {
 267        { 0xf, 0x8 },
 268        { 0x7, 0xc },
 269        { 0x3, 0xe },
 270        { 0x1, 0xf },
 271};
 272static struct edge_tab edge16_tab_l[4] = {
 273        { 0xf, 0x1 },
 274        { 0xe, 0x3 },
 275        { 0xc, 0x7 },
 276        { 0x8, 0xf },
 277};
 278static struct edge_tab edge32_tab[2] = {
 279        { 0x3, 0x2 },
 280        { 0x1, 0x3 },
 281};
 282static struct edge_tab edge32_tab_l[2] = {
 283        { 0x3, 0x1 },
 284        { 0x2, 0x3 },
 285};
 286
 287static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 288{
 289        unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
 290        u16 left, right;
 291
 292        maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
 293        orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
 294        orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
 295
 296        if (test_thread_flag(TIF_32BIT)) {
 297                rs1 = rs1 & 0xffffffff;
 298                rs2 = rs2 & 0xffffffff;
 299        }
 300        switch (opf) {
 301        default:
 302        case EDGE8_OPF:
 303        case EDGE8N_OPF:
 304                left = edge8_tab[rs1 & 0x7].left;
 305                right = edge8_tab[rs2 & 0x7].right;
 306                break;
 307        case EDGE8L_OPF:
 308        case EDGE8LN_OPF:
 309                left = edge8_tab_l[rs1 & 0x7].left;
 310                right = edge8_tab_l[rs2 & 0x7].right;
 311                break;
 312
 313        case EDGE16_OPF:
 314        case EDGE16N_OPF:
 315                left = edge16_tab[(rs1 >> 1) & 0x3].left;
 316                right = edge16_tab[(rs2 >> 1) & 0x3].right;
 317                break;
 318
 319        case EDGE16L_OPF:
 320        case EDGE16LN_OPF:
 321                left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
 322                right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
 323                break;
 324
 325        case EDGE32_OPF:
 326        case EDGE32N_OPF:
 327                left = edge32_tab[(rs1 >> 2) & 0x1].left;
 328                right = edge32_tab[(rs2 >> 2) & 0x1].right;
 329                break;
 330
 331        case EDGE32L_OPF:
 332        case EDGE32LN_OPF:
 333                left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
 334                right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
 335                break;
 336        };
 337
 338        if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
 339                rd_val = right & left;
 340        else
 341                rd_val = left;
 342
 343        store_reg(regs, rd_val, RD(insn));
 344
 345        switch (opf) {
 346        case EDGE8_OPF:
 347        case EDGE8L_OPF:
 348        case EDGE16_OPF:
 349        case EDGE16L_OPF:
 350        case EDGE32_OPF:
 351        case EDGE32L_OPF: {
 352                unsigned long ccr, tstate;
 353
 354                __asm__ __volatile__("subcc     %1, %2, %%g0\n\t"
 355                                     "rd        %%ccr, %0"
 356                                     : "=r" (ccr)
 357                                     : "r" (orig_rs1), "r" (orig_rs2)
 358                                     : "cc");
 359                tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
 360                regs->tstate = tstate | (ccr << 32UL);
 361        }
 362        };
 363}
 364
 365static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 366{
 367        unsigned long rs1, rs2, rd_val;
 368        unsigned int bits, bits_mask;
 369
 370        maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
 371        rs1 = fetch_reg(RS1(insn), regs);
 372        rs2 = fetch_reg(RS2(insn), regs);
 373
 374        bits = (rs2 > 5 ? 5 : rs2);
 375        bits_mask = (1UL << bits) - 1UL;
 376
 377        rd_val = ((((rs1 >> 11) & 0x3) <<  0) |
 378                  (((rs1 >> 33) & 0x3) <<  2) |
 379                  (((rs1 >> 55) & 0x1) <<  4) |
 380                  (((rs1 >> 13) & 0xf) <<  5) |
 381                  (((rs1 >> 35) & 0xf) <<  9) |
 382                  (((rs1 >> 56) & 0xf) << 13) |
 383                  (((rs1 >> 17) & bits_mask) << 17) |
 384                  (((rs1 >> 39) & bits_mask) << (17 + bits)) |
 385                  (((rs1 >> 60) & 0xf)       << (17 + (2*bits))));
 386
 387        switch (opf) {
 388        case ARRAY16_OPF:
 389                rd_val <<= 1;
 390                break;
 391
 392        case ARRAY32_OPF:
 393                rd_val <<= 2;
 394        };
 395
 396        store_reg(regs, rd_val, RD(insn));
 397}
 398
 399static void bmask(struct pt_regs *regs, unsigned int insn)
 400{
 401        unsigned long rs1, rs2, rd_val, gsr;
 402
 403        maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
 404        rs1 = fetch_reg(RS1(insn), regs);
 405        rs2 = fetch_reg(RS2(insn), regs);
 406        rd_val = rs1 + rs2;
 407
 408        store_reg(regs, rd_val, RD(insn));
 409
 410        gsr = current_thread_info()->gsr[0] & 0xffffffff;
 411        gsr |= rd_val << 32UL;
 412        current_thread_info()->gsr[0] = gsr;
 413}
 414
 415static void bshuffle(struct pt_regs *regs, unsigned int insn)
 416{
 417        struct fpustate *f = FPUSTATE;
 418        unsigned long rs1, rs2, rd_val;
 419        unsigned long bmask, i;
 420
 421        bmask = current_thread_info()->gsr[0] >> 32UL;
 422
 423        rs1 = fpd_regval(f, RS1(insn));
 424        rs2 = fpd_regval(f, RS2(insn));
 425
 426        rd_val = 0UL;
 427        for (i = 0; i < 8; i++) {
 428                unsigned long which = (bmask >> (i * 4)) & 0xf;
 429                unsigned long byte;
 430
 431                if (which < 8)
 432                        byte = (rs1 >> (which * 8)) & 0xff;
 433                else
 434                        byte = (rs2 >> ((which-8)*8)) & 0xff;
 435                rd_val |= (byte << (i * 8));
 436        }
 437
 438        *fpd_regaddr(f, RD(insn)) = rd_val;
 439}
 440
 441static void pdist(struct pt_regs *regs, unsigned int insn)
 442{
 443        struct fpustate *f = FPUSTATE;
 444        unsigned long rs1, rs2, *rd, rd_val;
 445        unsigned long i;
 446
 447        rs1 = fpd_regval(f, RS1(insn));
 448        rs2 = fpd_regval(f, RS2(insn));
 449        rd = fpd_regaddr(f, RD(insn));
 450
 451        rd_val = *rd;
 452
 453        for (i = 0; i < 8; i++) {
 454                s16 s1, s2;
 455
 456                s1 = (rs1 >> (56 - (i * 8))) & 0xff;
 457                s2 = (rs2 >> (56 - (i * 8))) & 0xff;
 458
 459                /* Absolute value of difference. */
 460                s1 -= s2;
 461                if (s1 < 0)
 462                        s1 = ~s1 + 1;
 463
 464                rd_val += s1;
 465        }
 466
 467        *rd = rd_val;
 468}
 469
 470static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 471{
 472        struct fpustate *f = FPUSTATE;
 473        unsigned long rs1, rs2, gsr, scale, rd_val;
 474
 475        gsr = current_thread_info()->gsr[0];
 476        scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
 477        switch (opf) {
 478        case FPACK16_OPF: {
 479                unsigned long byte;
 480
 481                rs2 = fpd_regval(f, RS2(insn));
 482                rd_val = 0;
 483                for (byte = 0; byte < 4; byte++) {
 484                        unsigned int val;
 485                        s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
 486                        int scaled = src << scale;
 487                        int from_fixed = scaled >> 7;
 488
 489                        val = ((from_fixed < 0) ?
 490                               0 :
 491                               (from_fixed > 255) ?
 492                               255 : from_fixed);
 493
 494                        rd_val |= (val << (8 * byte));
 495                }
 496                *fps_regaddr(f, RD(insn)) = rd_val;
 497                break;
 498        }
 499
 500        case FPACK32_OPF: {
 501                unsigned long word;
 502
 503                rs1 = fpd_regval(f, RS1(insn));
 504                rs2 = fpd_regval(f, RS2(insn));
 505                rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
 506                for (word = 0; word < 2; word++) {
 507                        unsigned long val;
 508                        s32 src = (rs2 >> (word * 32UL));
 509                        s64 scaled = src << scale;
 510                        s64 from_fixed = scaled >> 23;
 511
 512                        val = ((from_fixed < 0) ?
 513                               0 :
 514                               (from_fixed > 255) ?
 515                               255 : from_fixed);
 516
 517                        rd_val |= (val << (32 * word));
 518                }
 519                *fpd_regaddr(f, RD(insn)) = rd_val;
 520                break;
 521        }
 522
 523        case FPACKFIX_OPF: {
 524                unsigned long word;
 525
 526                rs2 = fpd_regval(f, RS2(insn));
 527
 528                rd_val = 0;
 529                for (word = 0; word < 2; word++) {
 530                        long val;
 531                        s32 src = (rs2 >> (word * 32UL));
 532                        s64 scaled = src << scale;
 533                        s64 from_fixed = scaled >> 16;
 534
 535                        val = ((from_fixed < -32768) ?
 536                               -32768 :
 537                               (from_fixed > 32767) ?
 538                               32767 : from_fixed);
 539
 540                        rd_val |= ((val & 0xffff) << (word * 16));
 541                }
 542                *fps_regaddr(f, RD(insn)) = rd_val;
 543                break;
 544        }
 545
 546        case FEXPAND_OPF: {
 547                unsigned long byte;
 548
 549                rs2 = fps_regval(f, RS2(insn));
 550
 551                rd_val = 0;
 552                for (byte = 0; byte < 4; byte++) {
 553                        unsigned long val;
 554                        u8 src = (rs2 >> (byte * 8)) & 0xff;
 555
 556                        val = src << 4;
 557
 558                        rd_val |= (val << (byte * 16));
 559                }
 560                *fpd_regaddr(f, RD(insn)) = rd_val;
 561                break;
 562        }
 563
 564        case FPMERGE_OPF: {
 565                rs1 = fps_regval(f, RS1(insn));
 566                rs2 = fps_regval(f, RS2(insn));
 567
 568                rd_val = (((rs2 & 0x000000ff) <<  0) |
 569                          ((rs1 & 0x000000ff) <<  8) |
 570                          ((rs2 & 0x0000ff00) <<  8) |
 571                          ((rs1 & 0x0000ff00) << 16) |
 572                          ((rs2 & 0x00ff0000) << 16) |
 573                          ((rs1 & 0x00ff0000) << 24) |
 574                          ((rs2 & 0xff000000) << 24) |
 575                          ((rs1 & 0xff000000) << 32));
 576                *fpd_regaddr(f, RD(insn)) = rd_val;
 577                break;
 578        }
 579        };
 580}
 581
 582static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 583{
 584        struct fpustate *f = FPUSTATE;
 585        unsigned long rs1, rs2, rd_val;
 586
 587        switch (opf) {
 588        case FMUL8x16_OPF: {
 589                unsigned long byte;
 590
 591                rs1 = fps_regval(f, RS1(insn));
 592                rs2 = fpd_regval(f, RS2(insn));
 593
 594                rd_val = 0;
 595                for (byte = 0; byte < 4; byte++) {
 596                        u16 src1 = (rs1 >> (byte *  8)) & 0x00ff;
 597                        s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
 598                        u32 prod = src1 * src2;
 599                        u16 scaled = ((prod & 0x00ffff00) >> 8);
 600
 601                        /* Round up.  */
 602                        if (prod & 0x80)
 603                                scaled++;
 604                        rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
 605                }
 606
 607                *fpd_regaddr(f, RD(insn)) = rd_val;
 608                break;
 609        }
 610
 611        case FMUL8x16AU_OPF:
 612        case FMUL8x16AL_OPF: {
 613                unsigned long byte;
 614                s16 src2;
 615
 616                rs1 = fps_regval(f, RS1(insn));
 617                rs2 = fps_regval(f, RS2(insn));
 618
 619                rd_val = 0;
 620                src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0);
 621                for (byte = 0; byte < 4; byte++) {
 622                        u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
 623                        u32 prod = src1 * src2;
 624                        u16 scaled = ((prod & 0x00ffff00) >> 8);
 625
 626                        /* Round up.  */
 627                        if (prod & 0x80)
 628                                scaled++;
 629                        rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
 630                }
 631
 632                *fpd_regaddr(f, RD(insn)) = rd_val;
 633                break;
 634        }
 635
 636        case FMUL8SUx16_OPF:
 637        case FMUL8ULx16_OPF: {
 638                unsigned long byte, ushift;
 639
 640                rs1 = fpd_regval(f, RS1(insn));
 641                rs2 = fpd_regval(f, RS2(insn));
 642
 643                rd_val = 0;
 644                ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
 645                for (byte = 0; byte < 4; byte++) {
 646                        u16 src1;
 647                        s16 src2;
 648                        u32 prod;
 649                        u16 scaled;
 650
 651                        src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
 652                        src2 = ((rs2 >> (16 * byte)) & 0xffff);
 653                        prod = src1 * src2;
 654                        scaled = ((prod & 0x00ffff00) >> 8);
 655
 656                        /* Round up.  */
 657                        if (prod & 0x80)
 658                                scaled++;
 659                        rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
 660                }
 661
 662                *fpd_regaddr(f, RD(insn)) = rd_val;
 663                break;
 664        }
 665
 666        case FMULD8SUx16_OPF:
 667        case FMULD8ULx16_OPF: {
 668                unsigned long byte, ushift;
 669
 670                rs1 = fps_regval(f, RS1(insn));
 671                rs2 = fps_regval(f, RS2(insn));
 672
 673                rd_val = 0;
 674                ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
 675                for (byte = 0; byte < 2; byte++) {
 676                        u16 src1;
 677                        s16 src2;
 678                        u32 prod;
 679                        u16 scaled;
 680
 681                        src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
 682                        src2 = ((rs2 >> (16 * byte)) & 0xffff);
 683                        prod = src1 * src2;
 684                        scaled = ((prod & 0x00ffff00) >> 8);
 685
 686                        /* Round up.  */
 687                        if (prod & 0x80)
 688                                scaled++;
 689                        rd_val |= ((scaled & 0xffffUL) <<
 690                                   ((byte * 32UL) + 7UL));
 691                }
 692                *fpd_regaddr(f, RD(insn)) = rd_val;
 693                break;
 694        }
 695        };
 696}
 697
 698static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 699{
 700        struct fpustate *f = FPUSTATE;
 701        unsigned long rs1, rs2, rd_val, i;
 702
 703        rs1 = fpd_regval(f, RS1(insn));
 704        rs2 = fpd_regval(f, RS2(insn));
 705
 706        rd_val = 0;
 707
 708        switch (opf) {
 709        case FCMPGT16_OPF:
 710                for (i = 0; i < 4; i++) {
 711                        s16 a = (rs1 >> (i * 16)) & 0xffff;
 712                        s16 b = (rs2 >> (i * 16)) & 0xffff;
 713
 714                        if (a > b)
 715                                rd_val |= 1 << i;
 716                }
 717                break;
 718
 719        case FCMPGT32_OPF:
 720                for (i = 0; i < 2; i++) {
 721                        s32 a = (rs1 >> (i * 32)) & 0xffff;
 722                        s32 b = (rs2 >> (i * 32)) & 0xffff;
 723
 724                        if (a > b)
 725                                rd_val |= 1 << i;
 726                }
 727                break;
 728
 729        case FCMPLE16_OPF:
 730                for (i = 0; i < 4; i++) {
 731                        s16 a = (rs1 >> (i * 16)) & 0xffff;
 732                        s16 b = (rs2 >> (i * 16)) & 0xffff;
 733
 734                        if (a <= b)
 735                                rd_val |= 1 << i;
 736                }
 737                break;
 738
 739        case FCMPLE32_OPF:
 740                for (i = 0; i < 2; i++) {
 741                        s32 a = (rs1 >> (i * 32)) & 0xffff;
 742                        s32 b = (rs2 >> (i * 32)) & 0xffff;
 743
 744                        if (a <= b)
 745                                rd_val |= 1 << i;
 746                }
 747                break;
 748
 749        case FCMPNE16_OPF:
 750                for (i = 0; i < 4; i++) {
 751                        s16 a = (rs1 >> (i * 16)) & 0xffff;
 752                        s16 b = (rs2 >> (i * 16)) & 0xffff;
 753
 754                        if (a != b)
 755                                rd_val |= 1 << i;
 756                }
 757                break;
 758
 759        case FCMPNE32_OPF:
 760                for (i = 0; i < 2; i++) {
 761                        s32 a = (rs1 >> (i * 32)) & 0xffff;
 762                        s32 b = (rs2 >> (i * 32)) & 0xffff;
 763
 764                        if (a != b)
 765                                rd_val |= 1 << i;
 766                }
 767                break;
 768
 769        case FCMPEQ16_OPF:
 770                for (i = 0; i < 4; i++) {
 771                        s16 a = (rs1 >> (i * 16)) & 0xffff;
 772                        s16 b = (rs2 >> (i * 16)) & 0xffff;
 773
 774                        if (a == b)
 775                                rd_val |= 1 << i;
 776                }
 777                break;
 778
 779        case FCMPEQ32_OPF:
 780                for (i = 0; i < 2; i++) {
 781                        s32 a = (rs1 >> (i * 32)) & 0xffff;
 782                        s32 b = (rs2 >> (i * 32)) & 0xffff;
 783
 784                        if (a == b)
 785                                rd_val |= 1 << i;
 786                }
 787                break;
 788        };
 789
 790        maybe_flush_windows(0, 0, RD(insn), 0);
 791        store_reg(regs, rd_val, RD(insn));
 792}
 793
 794/* Emulate the VIS instructions which are not implemented in
 795 * hardware on Niagara.
 796 */
 797int vis_emul(struct pt_regs *regs, unsigned int insn)
 798{
 799        unsigned long pc = regs->tpc;
 800        unsigned int opf;
 801
 802        BUG_ON(regs->tstate & TSTATE_PRIV);
 803
 804        if (test_thread_flag(TIF_32BIT))
 805                pc = (u32)pc;
 806
 807        if (get_user(insn, (u32 __user *) pc))
 808                return -EFAULT;
 809
 810        save_and_clear_fpu();
 811
 812        opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
 813        switch (opf) {
 814        default:
 815                return -EINVAL;
 816
 817        /* Pixel Formatting Instructions.  */
 818        case FPACK16_OPF:
 819        case FPACK32_OPF:
 820        case FPACKFIX_OPF:
 821        case FEXPAND_OPF:
 822        case FPMERGE_OPF:
 823                pformat(regs, insn, opf);
 824                break;
 825
 826        /* Partitioned Multiply Instructions  */
 827        case FMUL8x16_OPF:
 828        case FMUL8x16AU_OPF:
 829        case FMUL8x16AL_OPF:
 830        case FMUL8SUx16_OPF:
 831        case FMUL8ULx16_OPF:
 832        case FMULD8SUx16_OPF:
 833        case FMULD8ULx16_OPF:
 834                pmul(regs, insn, opf);
 835                break;
 836
 837        /* Pixel Compare Instructions  */
 838        case FCMPGT16_OPF:
 839        case FCMPGT32_OPF:
 840        case FCMPLE16_OPF:
 841        case FCMPLE32_OPF:
 842        case FCMPNE16_OPF:
 843        case FCMPNE32_OPF:
 844        case FCMPEQ16_OPF:
 845        case FCMPEQ32_OPF:
 846                pcmp(regs, insn, opf);
 847                break;
 848
 849        /* Edge Handling Instructions  */
 850        case EDGE8_OPF:
 851        case EDGE8N_OPF:
 852        case EDGE8L_OPF:
 853        case EDGE8LN_OPF:
 854        case EDGE16_OPF:
 855        case EDGE16N_OPF:
 856        case EDGE16L_OPF:
 857        case EDGE16LN_OPF:
 858        case EDGE32_OPF:
 859        case EDGE32N_OPF:
 860        case EDGE32L_OPF:
 861        case EDGE32LN_OPF:
 862                edge(regs, insn, opf);
 863                break;
 864
 865        /* Pixel Component Distance  */
 866        case PDIST_OPF:
 867                pdist(regs, insn);
 868                break;
 869
 870        /* Three-Dimensional Array Addressing Instructions  */
 871        case ARRAY8_OPF:
 872        case ARRAY16_OPF:
 873        case ARRAY32_OPF:
 874                array(regs, insn, opf);
 875                break;
 876
 877        /* Byte Mask and Shuffle Instructions  */
 878        case BMASK_OPF:
 879                bmask(regs, insn);
 880                break;
 881
 882        case BSHUFFLE_OPF:
 883                bshuffle(regs, insn);
 884                break;
 885        };
 886
 887        regs->tpc = regs->tnpc;
 888        regs->tnpc += 4;
 889        return 0;
 890}
 891
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.