linux/lib/mpi/longlong.h
<<
>>
Prefs
   1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2 * Note: I added some stuff for use with gnupg
   3 *
   4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
   5 *      2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   6 *
   7 * This file is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU Library General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or (at your
  10 * option) any later version.
  11 *
  12 * This file is distributed in the hope that it will be useful, but
  13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  15 * License for more details.
  16 *
  17 * You should have received a copy of the GNU Library General Public License
  18 * along with this file; see the file COPYING.LIB.  If not, write to
  19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  20 * MA 02111-1307, USA. */
  21
  22/* You have to define the following before including this file:
  23 *
  24 * UWtype -- An unsigned type, default type for operations (typically a "word")
  25 * UHWtype -- An unsigned type, at least half the size of UWtype.
  26 * UDWtype -- An unsigned type, at least twice as large a UWtype
  27 * W_TYPE_SIZE -- size in bits of UWtype
  28 *
  29 * SItype, USItype -- Signed and unsigned 32 bit types.
  30 * DItype, UDItype -- Signed and unsigned 64 bit types.
  31 *
  32 * On a 32 bit machine UWtype should typically be USItype;
  33 * on a 64 bit machine, UWtype should typically be UDItype.
  34*/
  35
  36#define __BITS4 (W_TYPE_SIZE / 4)
  37#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  38#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  39#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  40
  41/* This is used to make sure no undesirable sharing between different libraries
  42        that use this file takes place.  */
  43#ifndef __MPN
  44#define __MPN(x) __##x
  45#endif
  46
  47/* Define auxiliary asm macros.
  48 *
  49 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
  50 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
  51 * word product in HIGH_PROD and LOW_PROD.
  52 *
  53 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  54 * UDWtype product.  This is just a variant of umul_ppmm.
  55
  56 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  57 * denominator) divides a UDWtype, composed by the UWtype integers
  58 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  59 * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  60 * than DENOMINATOR for correct operation.  If, in addition, the most
  61 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  62 * UDIV_NEEDS_NORMALIZATION is defined to 1.
  63 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  64 * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  65 * is rounded towards 0.
  66 *
  67 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
  68 * msb to the first non-zero bit in the UWtype X.  This is the number of
  69 * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  70 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  71 *
  72 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  73 * from the least significant end.
  74 *
  75 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  76 * high_addend_2, low_addend_2) adds two UWtype integers, composed by
  77 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  78 * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  79 * (i.e. carry out) is not stored anywhere, and is lost.
  80 *
  81 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
  82 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
  83 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
  84 * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
  85 * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
  86 * and is lost.
  87 *
  88 * If any of these macros are left undefined for a particular CPU,
  89 * C macros are used.  */
  90
  91/* The CPUs come in alphabetical order below.
  92 *
  93 * Please add support for more CPUs here, or improve the current support
  94 * for the CPUs below!  */
  95
  96#if defined(__GNUC__) && !defined(NO_ASM)
  97
  98/* We sometimes need to clobber "cc" with gcc2, but that would not be
  99        understood by gcc1.     Use cpp to avoid major code duplication.  */
 100#if __GNUC__ < 2
 101#define __CLOBBER_CC
 102#define __AND_CLOBBER_CC
 103#else /* __GNUC__ >= 2 */
 104#define __CLOBBER_CC : "cc"
 105#define __AND_CLOBBER_CC , "cc"
 106#endif /* __GNUC__ < 2 */
 107
 108/***************************************
 109        **************  A29K  *****************
 110        ***************************************/
 111#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
 112#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 113        __asm__ ("add %1,%4,%5\n" \
 114                "addc %0,%2,%3" \
 115        : "=r" ((USItype)(sh)), \
 116                "=&r" ((USItype)(sl)) \
 117        : "%r" ((USItype)(ah)), \
 118                "rI" ((USItype)(bh)), \
 119                "%r" ((USItype)(al)), \
 120                "rI" ((USItype)(bl)))
 121#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 122        __asm__ ("sub %1,%4,%5\n" \
 123                "subc %0,%2,%3" \
 124        : "=r" ((USItype)(sh)), \
 125                "=&r" ((USItype)(sl)) \
 126        : "r" ((USItype)(ah)), \
 127                "rI" ((USItype)(bh)), \
 128                "r" ((USItype)(al)), \
 129                "rI" ((USItype)(bl)))
 130#define umul_ppmm(xh, xl, m0, m1) \
 131do { \
 132                USItype __m0 = (m0), __m1 = (m1); \
 133                __asm__ ("multiplu %0,%1,%2" \
 134                : "=r" ((USItype)(xl)) \
 135                : "r" (__m0), \
 136                        "r" (__m1)); \
 137                __asm__ ("multmu %0,%1,%2" \
 138                : "=r" ((USItype)(xh)) \
 139                : "r" (__m0), \
 140                        "r" (__m1)); \
 141} while (0)
 142#define udiv_qrnnd(q, r, n1, n0, d) \
 143        __asm__ ("dividu %0,%3,%4" \
 144        : "=r" ((USItype)(q)), \
 145                "=q" ((USItype)(r)) \
 146        : "1" ((USItype)(n1)), \
 147                "r" ((USItype)(n0)), \
 148                "r" ((USItype)(d)))
 149
 150#define count_leading_zeros(count, x) \
 151        __asm__ ("clz %0,%1" \
 152        : "=r" ((USItype)(count)) \
 153        : "r" ((USItype)(x)))
 154#define COUNT_LEADING_ZEROS_0 32
 155#endif /* __a29k__ */
 156
 157#if defined(__alpha) && W_TYPE_SIZE == 64
 158#define umul_ppmm(ph, pl, m0, m1) \
 159do { \
 160                UDItype __m0 = (m0), __m1 = (m1); \
 161                __asm__ ("umulh %r1,%2,%0" \
 162                : "=r" ((UDItype) ph) \
 163                : "%rJ" (__m0), \
 164                        "rI" (__m1)); \
 165                (pl) = __m0 * __m1; \
 166        } while (0)
 167#define UMUL_TIME 46
 168#ifndef LONGLONG_STANDALONE
 169#define udiv_qrnnd(q, r, n1, n0, d) \
 170do { UDItype __r; \
 171        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
 172        (r) = __r; \
 173} while (0)
 174extern UDItype __udiv_qrnnd();
 175#define UDIV_TIME 220
 176#endif /* LONGLONG_STANDALONE */
 177#endif /* __alpha */
 178
 179/***************************************
 180        **************  ARM  ******************
 181        ***************************************/
 182#if defined(__arm__) && W_TYPE_SIZE == 32
 183#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 184        __asm__ ("adds %1, %4, %5\n" \
 185                "adc  %0, %2, %3" \
 186        : "=r" ((USItype)(sh)), \
 187                "=&r" ((USItype)(sl)) \
 188        : "%r" ((USItype)(ah)), \
 189                "rI" ((USItype)(bh)), \
 190                "%r" ((USItype)(al)), \
 191                "rI" ((USItype)(bl)))
 192#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 193        __asm__ ("subs %1, %4, %5\n" \
 194                "sbc  %0, %2, %3" \
 195        : "=r" ((USItype)(sh)), \
 196                "=&r" ((USItype)(sl)) \
 197        : "r" ((USItype)(ah)), \
 198                "rI" ((USItype)(bh)), \
 199                "r" ((USItype)(al)), \
 200                "rI" ((USItype)(bl)))
 201#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
 202#define umul_ppmm(xh, xl, a, b) \
 203        __asm__ ("%@ Inlined umul_ppmm\n" \
 204                "mov    %|r0, %2, lsr #16               @ AAAA\n" \
 205                "mov    %|r2, %3, lsr #16               @ BBBB\n" \
 206                "bic    %|r1, %2, %|r0, lsl #16         @ aaaa\n" \
 207                "bic    %0, %3, %|r2, lsl #16           @ bbbb\n" \
 208                "mul    %1, %|r1, %|r2                  @ aaaa * BBBB\n" \
 209                "mul    %|r2, %|r0, %|r2                @ AAAA * BBBB\n" \
 210                "mul    %|r1, %0, %|r1                  @ aaaa * bbbb\n" \
 211                "mul    %0, %|r0, %0                    @ AAAA * bbbb\n" \
 212                "adds   %|r0, %1, %0                    @ central sum\n" \
 213                "addcs  %|r2, %|r2, #65536\n" \
 214                "adds   %1, %|r1, %|r0, lsl #16\n" \
 215                "adc    %0, %|r2, %|r0, lsr #16" \
 216        : "=&r" ((USItype)(xh)), \
 217                "=r" ((USItype)(xl)) \
 218        : "r" ((USItype)(a)), \
 219                "r" ((USItype)(b)) \
 220        : "r0", "r1", "r2")
 221#else
 222#define umul_ppmm(xh, xl, a, b) \
 223        __asm__ ("%@ Inlined umul_ppmm\n" \
 224                "umull %r1, %r0, %r2, %r3" \
 225        : "=&r" ((USItype)(xh)), \
 226                        "=r" ((USItype)(xl)) \
 227        : "r" ((USItype)(a)), \
 228                        "r" ((USItype)(b)) \
 229        : "r0", "r1")
 230#endif
 231#define UMUL_TIME 20
 232#define UDIV_TIME 100
 233#endif /* __arm__ */
 234
 235/***************************************
 236        **************  CLIPPER  **************
 237        ***************************************/
 238#if defined(__clipper__) && W_TYPE_SIZE == 32
 239#define umul_ppmm(w1, w0, u, v) \
 240        ({union {UDItype __ll; \
 241                struct {USItype __l, __h; } __i; \
 242        } __xx; \
 243        __asm__ ("mulwux %2,%0" \
 244        : "=r" (__xx.__ll) \
 245        : "%0" ((USItype)(u)), \
 246                "r" ((USItype)(v))); \
 247        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
 248#define smul_ppmm(w1, w0, u, v) \
 249        ({union {DItype __ll; \
 250                struct {SItype __l, __h; } __i; \
 251        } __xx; \
 252        __asm__ ("mulwx %2,%0" \
 253        : "=r" (__xx.__ll) \
 254        : "%0" ((SItype)(u)), \
 255                "r" ((SItype)(v))); \
 256        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
 257#define __umulsidi3(u, v) \
 258        ({UDItype __w; \
 259        __asm__ ("mulwux %2,%0" \
 260        : "=r" (__w) \
 261        : "%0" ((USItype)(u)), \
 262                "r" ((USItype)(v))); \
 263        __w; })
 264#endif /* __clipper__ */
 265
 266/***************************************
 267        **************  GMICRO  ***************
 268        ***************************************/
 269#if defined(__gmicro__) && W_TYPE_SIZE == 32
 270#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 271        __asm__ ("add.w %5,%1\n" \
 272                "addx %3,%0" \
 273        : "=g" ((USItype)(sh)), \
 274                "=&g" ((USItype)(sl)) \
 275        : "%0" ((USItype)(ah)), \
 276                "g" ((USItype)(bh)), \
 277                "%1" ((USItype)(al)), \
 278                "g" ((USItype)(bl)))
 279#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 280        __asm__ ("sub.w %5,%1\n" \
 281                "subx %3,%0" \
 282        : "=g" ((USItype)(sh)), \
 283                "=&g" ((USItype)(sl)) \
 284        : "0" ((USItype)(ah)), \
 285                "g" ((USItype)(bh)), \
 286                "1" ((USItype)(al)), \
 287                "g" ((USItype)(bl)))
 288#define umul_ppmm(ph, pl, m0, m1) \
 289        __asm__ ("mulx %3,%0,%1" \
 290        : "=g" ((USItype)(ph)), \
 291                "=r" ((USItype)(pl)) \
 292        : "%0" ((USItype)(m0)), \
 293                "g" ((USItype)(m1)))
 294#define udiv_qrnnd(q, r, nh, nl, d) \
 295        __asm__ ("divx %4,%0,%1" \
 296        : "=g" ((USItype)(q)), \
 297                "=r" ((USItype)(r)) \
 298        : "1" ((USItype)(nh)), \
 299                "0" ((USItype)(nl)), \
 300                "g" ((USItype)(d)))
 301#define count_leading_zeros(count, x) \
 302        __asm__ ("bsch/1 %1,%0" \
 303        : "=g" (count) \
 304        : "g" ((USItype)(x)), \
 305             "0" ((USItype)0))
 306#endif
 307
 308/***************************************
 309        **************  HPPA  *****************
 310        ***************************************/
 311#if defined(__hppa) && W_TYPE_SIZE == 32
 312#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 313        __asm__ ("add %4,%5,%1\n" \
 314                   "addc %2,%3,%0" \
 315        : "=r" ((USItype)(sh)), \
 316             "=&r" ((USItype)(sl)) \
 317        : "%rM" ((USItype)(ah)), \
 318             "rM" ((USItype)(bh)), \
 319             "%rM" ((USItype)(al)), \
 320             "rM" ((USItype)(bl)))
 321#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 322        __asm__ ("sub %4,%5,%1\n" \
 323           "subb %2,%3,%0" \
 324        : "=r" ((USItype)(sh)), \
 325             "=&r" ((USItype)(sl)) \
 326        : "rM" ((USItype)(ah)), \
 327             "rM" ((USItype)(bh)), \
 328             "rM" ((USItype)(al)), \
 329             "rM" ((USItype)(bl)))
 330#if defined(_PA_RISC1_1)
 331#define umul_ppmm(wh, wl, u, v) \
 332do { \
 333        union {UDItype __ll; \
 334        struct {USItype __h, __l; } __i; \
 335        } __xx; \
 336        __asm__ ("xmpyu %1,%2,%0" \
 337        : "=*f" (__xx.__ll) \
 338        : "*f" ((USItype)(u)), \
 339               "*f" ((USItype)(v))); \
 340        (wh) = __xx.__i.__h; \
 341        (wl) = __xx.__i.__l; \
 342} while (0)
 343#define UMUL_TIME 8
 344#define UDIV_TIME 60
 345#else
 346#define UMUL_TIME 40
 347#define UDIV_TIME 80
 348#endif
 349#ifndef LONGLONG_STANDALONE
 350#define udiv_qrnnd(q, r, n1, n0, d) \
 351do { USItype __r; \
 352        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
 353        (r) = __r; \
 354} while (0)
 355extern USItype __udiv_qrnnd();
 356#endif /* LONGLONG_STANDALONE */
 357#define count_leading_zeros(count, x) \
 358do { \
 359        USItype __tmp; \
 360        __asm__ ( \
 361        "ldi             1,%0\n" \
 362        "extru,=        %1,15,16,%%r0  ; Bits 31..16 zero?\n" \
 363        "extru,tr       %1,15,16,%1    ; No.  Shift down, skip add.\n" \
 364        "ldo            16(%0),%0      ; Yes.   Perform add.\n" \
 365        "extru,=        %1,23,8,%%r0   ; Bits 15..8 zero?\n" \
 366        "extru,tr       %1,23,8,%1     ; No.  Shift down, skip add.\n" \
 367        "ldo            8(%0),%0       ; Yes.   Perform add.\n" \
 368        "extru,=        %1,27,4,%%r0   ; Bits 7..4 zero?\n" \
 369        "extru,tr       %1,27,4,%1     ; No.  Shift down, skip add.\n" \
 370        "ldo            4(%0),%0       ; Yes.   Perform add.\n" \
 371        "extru,=        %1,29,2,%%r0   ; Bits 3..2 zero?\n" \
 372        "extru,tr       %1,29,2,%1     ; No.  Shift down, skip add.\n" \
 373        "ldo            2(%0),%0       ; Yes.   Perform add.\n" \
 374        "extru          %1,30,1,%1     ; Extract bit 1.\n" \
 375        "sub            %0,%1,%0       ; Subtract it.              " \
 376        : "=r" (count), "=r" (__tmp) : "1" (x)); \
 377} while (0)
 378#endif /* hppa */
 379
 380/***************************************
 381        **************  I370  *****************
 382        ***************************************/
 383#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
 384#define umul_ppmm(xh, xl, m0, m1) \
 385do { \
 386        union {UDItype __ll; \
 387           struct {USItype __h, __l; } __i; \
 388        } __xx; \
 389        USItype __m0 = (m0), __m1 = (m1); \
 390        __asm__ ("mr %0,%3" \
 391        : "=r" (__xx.__i.__h), \
 392               "=r" (__xx.__i.__l) \
 393        : "%1" (__m0), \
 394               "r" (__m1)); \
 395        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
 396        (xh) += ((((SItype) __m0 >> 31) & __m1) \
 397             + (((SItype) __m1 >> 31) & __m0)); \
 398} while (0)
 399#define smul_ppmm(xh, xl, m0, m1) \
 400do { \
 401        union {DItype __ll; \
 402           struct {USItype __h, __l; } __i; \
 403        } __xx; \
 404        __asm__ ("mr %0,%3" \
 405        : "=r" (__xx.__i.__h), \
 406               "=r" (__xx.__i.__l) \
 407        : "%1" (m0), \
 408               "r" (m1)); \
 409        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
 410} while (0)
 411#define sdiv_qrnnd(q, r, n1, n0, d) \
 412do { \
 413        union {DItype __ll; \
 414           struct {USItype __h, __l; } __i; \
 415        } __xx; \
 416        __xx.__i.__h = n1; __xx.__i.__l = n0; \
 417        __asm__ ("dr %0,%2" \
 418        : "=r" (__xx.__ll) \
 419        : "0" (__xx.__ll), "r" (d)); \
 420        (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
 421} while (0)
 422#endif
 423
 424/***************************************
 425        **************  I386  *****************
 426        ***************************************/
 427#undef __i386__
 428#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
 429#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 430        __asm__ ("addl %5,%1\n" \
 431           "adcl %3,%0" \
 432        : "=r" ((USItype)(sh)), \
 433             "=&r" ((USItype)(sl)) \
 434        : "%0" ((USItype)(ah)), \
 435             "g" ((USItype)(bh)), \
 436             "%1" ((USItype)(al)), \
 437             "g" ((USItype)(bl)))
 438#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 439        __asm__ ("subl %5,%1\n" \
 440           "sbbl %3,%0" \
 441        : "=r" ((USItype)(sh)), \
 442             "=&r" ((USItype)(sl)) \
 443        : "0" ((USItype)(ah)), \
 444             "g" ((USItype)(bh)), \
 445             "1" ((USItype)(al)), \
 446             "g" ((USItype)(bl)))
 447#define umul_ppmm(w1, w0, u, v) \
 448        __asm__ ("mull %3" \
 449        : "=a" ((USItype)(w0)), \
 450             "=d" ((USItype)(w1)) \
 451        : "%0" ((USItype)(u)), \
 452             "rm" ((USItype)(v)))
 453#define udiv_qrnnd(q, r, n1, n0, d) \
 454        __asm__ ("divl %4" \
 455        : "=a" ((USItype)(q)), \
 456             "=d" ((USItype)(r)) \
 457        : "0" ((USItype)(n0)), \
 458             "1" ((USItype)(n1)), \
 459             "rm" ((USItype)(d)))
 460#define count_leading_zeros(count, x) \
 461do { \
 462        USItype __cbtmp; \
 463        __asm__ ("bsrl %1,%0" \
 464        : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
 465        (count) = __cbtmp ^ 31; \
 466} while (0)
 467#define count_trailing_zeros(count, x) \
 468        __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
 469#ifndef UMUL_TIME
 470#define UMUL_TIME 40
 471#endif
 472#ifndef UDIV_TIME
 473#define UDIV_TIME 40
 474#endif
 475#endif /* 80x86 */
 476
 477/***************************************
 478        **************  I860  *****************
 479        ***************************************/
 480#if defined(__i860__) && W_TYPE_SIZE == 32
 481#define rshift_rhlc(r, h, l, c) \
 482        __asm__ ("shr %3,r0,r0\n" \
 483        "shrd %1,%2,%0" \
 484           "=r" (r) : "r" (h), "r" (l), "rn" (c))
 485#endif /* i860 */
 486
 487/***************************************
 488        **************  I960  *****************
 489        ***************************************/
 490#if defined(__i960__) && W_TYPE_SIZE == 32
 491#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 492        __asm__ ("cmpo 1,0\n" \
 493        "addc %5,%4,%1\n" \
 494        "addc %3,%2,%0" \
 495        : "=r" ((USItype)(sh)), \
 496             "=&r" ((USItype)(sl)) \
 497        : "%dI" ((USItype)(ah)), \
 498             "dI" ((USItype)(bh)), \
 499             "%dI" ((USItype)(al)), \
 500             "dI" ((USItype)(bl)))
 501#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 502        __asm__ ("cmpo 0,0\n" \
 503        "subc %5,%4,%1\n" \
 504        "subc %3,%2,%0" \
 505        : "=r" ((USItype)(sh)), \
 506             "=&r" ((USItype)(sl)) \
 507        : "dI" ((USItype)(ah)), \
 508             "dI" ((USItype)(bh)), \
 509             "dI" ((USItype)(al)), \
 510             "dI" ((USItype)(bl)))
 511#define umul_ppmm(w1, w0, u, v) \
 512        ({union {UDItype __ll; \
 513           struct {USItype __l, __h; } __i; \
 514        } __xx; \
 515        __asm__ ("emul        %2,%1,%0" \
 516        : "=d" (__xx.__ll) \
 517        : "%dI" ((USItype)(u)), \
 518             "dI" ((USItype)(v))); \
 519        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
 520#define __umulsidi3(u, v) \
 521        ({UDItype __w; \
 522        __asm__ ("emul      %2,%1,%0" \
 523        : "=d" (__w) \
 524        : "%dI" ((USItype)(u)), \
 525               "dI" ((USItype)(v))); \
 526        __w; })
 527#define udiv_qrnnd(q, r, nh, nl, d) \
 528do { \
 529        union {UDItype __ll; \
 530           struct {USItype __l, __h; } __i; \
 531        } __nn; \
 532        __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
 533        __asm__ ("ediv %d,%n,%0" \
 534        : "=d" (__rq.__ll) \
 535        : "dI" (__nn.__ll), \
 536             "dI" ((USItype)(d))); \
 537        (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
 538} while (0)
 539#define count_leading_zeros(count, x) \
 540do { \
 541        USItype __cbtmp; \
 542        __asm__ ("scanbit %1,%0" \
 543        : "=r" (__cbtmp) \
 544        : "r" ((USItype)(x))); \
 545        (count) = __cbtmp ^ 31; \
 546} while (0)
 547#define COUNT_LEADING_ZEROS_0 (-32)     /* sic */
 548#if defined(__i960mx)           /* what is the proper symbol to test??? */
 549#define rshift_rhlc(r, h, l, c) \
 550do { \
 551        union {UDItype __ll; \
 552           struct {USItype __l, __h; } __i; \
 553        } __nn; \
 554        __nn.__i.__h = (h); __nn.__i.__l = (l); \
 555        __asm__ ("shre %2,%1,%0" \
 556        : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
 557}
 558#endif /* i960mx */
 559#endif /* i960 */
 560
 561/***************************************
 562        **************  68000   ****************
 563        ***************************************/
 564#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
 565#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 566        __asm__ ("add%.l %5,%1\n" \
 567           "addx%.l %3,%0" \
 568        : "=d" ((USItype)(sh)), \
 569             "=&d" ((USItype)(sl)) \
 570        : "%0" ((USItype)(ah)), \
 571             "d" ((USItype)(bh)), \
 572             "%1" ((USItype)(al)), \
 573             "g" ((USItype)(bl)))
 574#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 575        __asm__ ("sub%.l %5,%1\n" \
 576           "subx%.l %3,%0" \
 577        : "=d" ((USItype)(sh)), \
 578             "=&d" ((USItype)(sl)) \
 579        : "0" ((USItype)(ah)), \
 580             "d" ((USItype)(bh)), \
 581             "1" ((USItype)(al)), \
 582             "g" ((USItype)(bl)))
 583#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
 584#define umul_ppmm(w1, w0, u, v) \
 585        __asm__ ("mulu%.l %3,%1:%0" \
 586        : "=d" ((USItype)(w0)), \
 587             "=d" ((USItype)(w1)) \
 588        : "%0" ((USItype)(u)), \
 589             "dmi" ((USItype)(v)))
 590#define UMUL_TIME 45
 591#define udiv_qrnnd(q, r, n1, n0, d) \
 592        __asm__ ("divu%.l %4,%1:%0" \
 593        : "=d" ((USItype)(q)), \
 594             "=d" ((USItype)(r)) \
 595        : "0" ((USItype)(n0)), \
 596             "1" ((USItype)(n1)), \
 597             "dmi" ((USItype)(d)))
 598#define UDIV_TIME 90
 599#define sdiv_qrnnd(q, r, n1, n0, d) \
 600        __asm__ ("divs%.l %4,%1:%0" \
 601        : "=d" ((USItype)(q)), \
 602             "=d" ((USItype)(r)) \
 603        : "0" ((USItype)(n0)), \
 604             "1" ((USItype)(n1)), \
 605             "dmi" ((USItype)(d)))
 606#define count_leading_zeros(count, x) \
 607        __asm__ ("bfffo %1{%b2:%b2},%0" \
 608        : "=d" ((USItype)(count)) \
 609        : "od" ((USItype)(x)), "n" (0))
 610#define COUNT_LEADING_ZEROS_0 32
 611#else /* not mc68020 */
 612#define umul_ppmm(xh, xl, a, b) \
 613do { USItype __umul_tmp1, __umul_tmp2; \
 614        __asm__ ("| Inlined umul_ppmm\n" \
 615        "move%.l %5,%3\n" \
 616        "move%.l %2,%0\n" \
 617        "move%.w %3,%1\n" \
 618        "swap   %3\n" \
 619        "swap   %0\n" \
 620        "mulu   %2,%1\n" \
 621        "mulu   %3,%0\n" \
 622        "mulu   %2,%3\n" \
 623        "swap   %2\n" \
 624        "mulu   %5,%2\n" \
 625        "add%.l %3,%2\n" \
 626        "jcc    1f\n" \
 627        "add%.l %#0x10000,%0\n" \
 628        "1:     move%.l %2,%3\n" \
 629        "clr%.w %2\n" \
 630        "swap   %2\n" \
 631        "swap   %3\n" \
 632        "clr%.w %3\n" \
 633        "add%.l %3,%1\n" \
 634        "addx%.l %2,%0\n" \
 635        "| End inlined umul_ppmm" \
 636        : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
 637                "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
 638        : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
 639} while (0)
 640#define UMUL_TIME 100
 641#define UDIV_TIME 400
 642#endif /* not mc68020 */
 643#endif /* mc68000 */
 644
 645/***************************************
 646        **************  88000   ****************
 647        ***************************************/
 648#if defined(__m88000__) && W_TYPE_SIZE == 32
 649#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 650        __asm__ ("addu.co %1,%r4,%r5\n" \
 651           "addu.ci %0,%r2,%r3" \
 652        : "=r" ((USItype)(sh)), \
 653             "=&r" ((USItype)(sl)) \
 654        : "%rJ" ((USItype)(ah)), \
 655             "rJ" ((USItype)(bh)), \
 656             "%rJ" ((USItype)(al)), \
 657             "rJ" ((USItype)(bl)))
 658#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 659        __asm__ ("subu.co %1,%r4,%r5\n" \
 660           "subu.ci %0,%r2,%r3" \
 661        : "=r" ((USItype)(sh)), \
 662             "=&r" ((USItype)(sl)) \
 663        : "rJ" ((USItype)(ah)), \
 664             "rJ" ((USItype)(bh)), \
 665             "rJ" ((USItype)(al)), \
 666             "rJ" ((USItype)(bl)))
 667#define count_leading_zeros(count, x) \
 668do { \
 669        USItype __cbtmp; \
 670        __asm__ ("ff1 %0,%1" \
 671        : "=r" (__cbtmp) \
 672        : "r" ((USItype)(x))); \
 673        (count) = __cbtmp ^ 31; \
 674} while (0)
 675#define COUNT_LEADING_ZEROS_0 63        /* sic */
 676#if defined(__m88110__)
 677#define umul_ppmm(wh, wl, u, v) \
 678do { \
 679        union {UDItype __ll; \
 680           struct {USItype __h, __l; } __i; \
 681        } __x; \
 682        __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
 683        (wh) = __x.__i.__h; \
 684        (wl) = __x.__i.__l; \
 685} while (0)
 686#define udiv_qrnnd(q, r, n1, n0, d) \
 687        ({union {UDItype __ll; \
 688           struct {USItype __h, __l; } __i; \
 689        } __x, __q; \
 690        __x.__i.__h = (n1); __x.__i.__l = (n0); \
 691        __asm__ ("divu.d %0,%1,%2" \
 692        : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
 693        (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
 694#define UMUL_TIME 5
 695#define UDIV_TIME 25
 696#else
 697#define UMUL_TIME 17
 698#define UDIV_TIME 150
 699#endif /* __m88110__ */
 700#endif /* __m88000__ */
 701
 702/***************************************
 703        **************  MIPS  *****************
 704        ***************************************/
 705#if defined(__mips__) && W_TYPE_SIZE == 32
 706#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
 707#define umul_ppmm(w1, w0, u, v) \
 708        __asm__ ("multu %2,%3" \
 709        : "=l" ((USItype)(w0)), \
 710             "=h" ((USItype)(w1)) \
 711        : "d" ((USItype)(u)), \
 712             "d" ((USItype)(v)))
 713#else
 714#define umul_ppmm(w1, w0, u, v) \
 715        __asm__ ("multu %2,%3\n" \
 716           "mflo %0\n" \
 717           "mfhi %1" \
 718        : "=d" ((USItype)(w0)), \
 719             "=d" ((USItype)(w1)) \
 720        : "d" ((USItype)(u)), \
 721             "d" ((USItype)(v)))
 722#endif
 723#define UMUL_TIME 10
 724#define UDIV_TIME 100
 725#endif /* __mips__ */
 726
 727/***************************************
 728        **************  MIPS/64  **************
 729        ***************************************/
 730#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
 731#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
 732#define umul_ppmm(w1, w0, u, v) \
 733        __asm__ ("dmultu %2,%3" \
 734        : "=l" ((UDItype)(w0)), \
 735             "=h" ((UDItype)(w1)) \
 736        : "d" ((UDItype)(u)), \
 737             "d" ((UDItype)(v)))
 738#else
 739#define umul_ppmm(w1, w0, u, v) \
 740        __asm__ ("dmultu %2,%3\n" \
 741           "mflo %0\n" \
 742           "mfhi %1" \
 743        : "=d" ((UDItype)(w0)), \
 744             "=d" ((UDItype)(w1)) \
 745        : "d" ((UDItype)(u)), \
 746             "d" ((UDItype)(v)))
 747#endif
 748#define UMUL_TIME 20
 749#define UDIV_TIME 140
 750#endif /* __mips__ */
 751
 752/***************************************
 753        **************  32000   ****************
 754        ***************************************/
 755#if defined(__ns32000__) && W_TYPE_SIZE == 32
 756#define umul_ppmm(w1, w0, u, v) \
 757        ({union {UDItype __ll; \
 758           struct {USItype __l, __h; } __i; \
 759        } __xx; \
 760        __asm__ ("meid %2,%0" \
 761        : "=g" (__xx.__ll) \
 762        : "%0" ((USItype)(u)), \
 763             "g" ((USItype)(v))); \
 764        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
 765#define __umulsidi3(u, v) \
 766        ({UDItype __w; \
 767        __asm__ ("meid %2,%0" \
 768        : "=g" (__w) \
 769        : "%0" ((USItype)(u)), \
 770               "g" ((USItype)(v))); \
 771        __w; })
 772#define udiv_qrnnd(q, r, n1, n0, d) \
 773        ({union {UDItype __ll; \
 774           struct {USItype __l, __h; } __i; \
 775        } __xx; \
 776        __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
 777        __asm__ ("deid %2,%0" \
 778        : "=g" (__xx.__ll) \
 779        : "0" (__xx.__ll), \
 780             "g" ((USItype)(d))); \
 781        (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 782#define count_trailing_zeros(count, x) \
 783do { \
 784        __asm__("ffsd      %2,%0" \
 785        : "=r"((USItype) (count)) \
 786        : "0"((USItype) 0), "r"((USItype) (x))); \
 787        } while (0)
 788#endif /* __ns32000__ */
 789
 790/***************************************
 791        **************  PPC  ******************
 792        ***************************************/
 793#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
 794#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 795do { \
 796        if (__builtin_constant_p(bh) && (bh) == 0) \
 797                __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
 798                : "=r" ((USItype)(sh)), \
 799                "=&r" ((USItype)(sl)) \
 800                : "%r" ((USItype)(ah)), \
 801                "%r" ((USItype)(al)), \
 802                "rI" ((USItype)(bl))); \
 803        else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
 804                __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
 805                : "=r" ((USItype)(sh)), \
 806                "=&r" ((USItype)(sl)) \
 807                : "%r" ((USItype)(ah)), \
 808                "%r" ((USItype)(al)), \
 809                "rI" ((USItype)(bl))); \
 810        else \
 811                __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
 812                : "=r" ((USItype)(sh)), \
 813                "=&r" ((USItype)(sl)) \
 814                : "%r" ((USItype)(ah)), \
 815                "r" ((USItype)(bh)), \
 816                "%r" ((USItype)(al)), \
 817                "rI" ((USItype)(bl))); \
 818} while (0)
 819#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 820do { \
 821        if (__builtin_constant_p(ah) && (ah) == 0) \
 822                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
 823                : "=r" ((USItype)(sh)), \
 824                "=&r" ((USItype)(sl)) \
 825                : "r" ((USItype)(bh)), \
 826                "rI" ((USItype)(al)), \
 827                "r" ((USItype)(bl))); \
 828        else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
 829                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
 830                : "=r" ((USItype)(sh)), \
 831                "=&r" ((USItype)(sl)) \
 832                : "r" ((USItype)(bh)), \
 833                "rI" ((USItype)(al)), \
 834                "r" ((USItype)(bl))); \
 835        else if (__builtin_constant_p(bh) && (bh) == 0) \
 836                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
 837                : "=r" ((USItype)(sh)), \
 838                "=&r" ((USItype)(sl)) \
 839                : "r" ((USItype)(ah)), \
 840                "rI" ((USItype)(al)), \
 841                "r" ((USItype)(bl))); \
 842        else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
 843                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
 844                : "=r" ((USItype)(sh)), \
 845                "=&r" ((USItype)(sl)) \
 846                : "r" ((USItype)(ah)), \
 847                "rI" ((USItype)(al)), \
 848                "r" ((USItype)(bl))); \
 849        else \
 850                __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
 851                : "=r" ((USItype)(sh)), \
 852                "=&r" ((USItype)(sl)) \
 853                : "r" ((USItype)(ah)), \
 854                "r" ((USItype)(bh)), \
 855                "rI" ((USItype)(al)), \
 856                "r" ((USItype)(bl))); \
 857} while (0)
 858#define count_leading_zeros(count, x) \
 859        __asm__ ("{cntlz|cntlzw} %0,%1" \
 860        : "=r" ((USItype)(count)) \
 861        : "r" ((USItype)(x)))
 862#define COUNT_LEADING_ZEROS_0 32
 863#if defined(_ARCH_PPC)
 864#define umul_ppmm(ph, pl, m0, m1) \
 865do { \
 866        USItype __m0 = (m0), __m1 = (m1); \
 867        __asm__ ("mulhwu %0,%1,%2" \
 868        : "=r" ((USItype) ph) \
 869        : "%r" (__m0), \
 870        "r" (__m1)); \
 871        (pl) = __m0 * __m1; \
 872} while (0)
 873#define UMUL_TIME 15
 874#define smul_ppmm(ph, pl, m0, m1) \
 875do { \
 876        SItype __m0 = (m0), __m1 = (m1); \
 877        __asm__ ("mulhw %0,%1,%2" \
 878        : "=r" ((SItype) ph) \
 879        : "%r" (__m0), \
 880        "r" (__m1)); \
 881        (pl) = __m0 * __m1; \
 882} while (0)
 883#define SMUL_TIME 14
 884#define UDIV_TIME 120
 885#else
 886#define umul_ppmm(xh, xl, m0, m1) \
 887do { \
 888        USItype __m0 = (m0), __m1 = (m1); \
 889        __asm__ ("mul %0,%2,%3" \
 890        : "=r" ((USItype)(xh)), \
 891        "=q" ((USItype)(xl)) \
 892        : "r" (__m0), \
 893        "r" (__m1)); \
 894        (xh) += ((((SItype) __m0 >> 31) & __m1) \
 895        + (((SItype) __m1 >> 31) & __m0)); \
 896} while (0)
 897#define UMUL_TIME 8
 898#define smul_ppmm(xh, xl, m0, m1) \
 899        __asm__ ("mul %0,%2,%3" \
 900        : "=r" ((SItype)(xh)), \
 901        "=q" ((SItype)(xl)) \
 902        : "r" (m0), \
 903        "r" (m1))
 904#define SMUL_TIME 4
 905#define sdiv_qrnnd(q, r, nh, nl, d) \
 906        __asm__ ("div %0,%2,%4" \
 907        : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
 908        : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
 909#define UDIV_TIME 100
 910#endif
 911#endif /* Power architecture variants.  */
 912
 913/***************************************
 914        **************  PYR  ******************
 915        ***************************************/
 916#if defined(__pyr__) && W_TYPE_SIZE == 32
 917#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 918        __asm__ ("addw        %5,%1\n" \
 919        "addwc  %3,%0" \
 920        : "=r" ((USItype)(sh)), \
 921        "=&r" ((USItype)(sl)) \
 922        : "%0" ((USItype)(ah)), \
 923        "g" ((USItype)(bh)), \
 924        "%1" ((USItype)(al)), \
 925        "g" ((USItype)(bl)))
 926#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 927        __asm__ ("subw        %5,%1\n" \
 928        "subwb  %3,%0" \
 929        : "=r" ((USItype)(sh)), \
 930        "=&r" ((USItype)(sl)) \
 931        : "0" ((USItype)(ah)), \
 932        "g" ((USItype)(bh)), \
 933        "1" ((USItype)(al)), \
 934        "g" ((USItype)(bl)))
 935        /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
 936#define umul_ppmm(w1, w0, u, v) \
 937        ({union {UDItype __ll; \
 938        struct {USItype __h, __l; } __i; \
 939        } __xx; \
 940        __asm__ ("movw %1,%R0\n" \
 941        "uemul %2,%0" \
 942        : "=&r" (__xx.__ll) \
 943        : "g" ((USItype) (u)), \
 944        "g" ((USItype)(v))); \
 945        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
 946#endif /* __pyr__ */
 947
 948/***************************************
 949        **************  RT/ROMP  **************
 950        ***************************************/
 951#if defined(__ibm032__) /* RT/ROMP */   && W_TYPE_SIZE == 32
 952#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 953        __asm__ ("a %1,%5\n" \
 954        "ae %0,%3" \
 955        : "=r" ((USItype)(sh)), \
 956        "=&r" ((USItype)(sl)) \
 957        : "%0" ((USItype)(ah)), \
 958        "r" ((USItype)(bh)), \
 959        "%1" ((USItype)(al)), \
 960        "r" ((USItype)(bl)))
 961#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 962        __asm__ ("s %1,%5\n" \
 963        "se %0,%3" \
 964        : "=r" ((USItype)(sh)), \
 965        "=&r" ((USItype)(sl)) \
 966        : "0" ((USItype)(ah)), \
 967        "r" ((USItype)(bh)), \
 968        "1" ((USItype)(al)), \
 969        "r" ((USItype)(bl)))
 970#define umul_ppmm(ph, pl, m0, m1) \
 971do { \
 972        USItype __m0 = (m0), __m1 = (m1); \
 973        __asm__ ( \
 974        "s       r2,r2\n" \
 975        "mts    r10,%2\n" \
 976        "m      r2,%3\n" \
 977        "m      r2,%3\n" \
 978        "m      r2,%3\n" \
 979        "m      r2,%3\n" \
 980        "m      r2,%3\n" \
 981        "m      r2,%3\n" \
 982        "m      r2,%3\n" \
 983        "m      r2,%3\n" \
 984        "m      r2,%3\n" \
 985        "m      r2,%3\n" \
 986        "m      r2,%3\n" \
 987        "m      r2,%3\n" \
 988        "m      r2,%3\n" \
 989        "m      r2,%3\n" \
 990        "m      r2,%3\n" \
 991        "m      r2,%3\n" \
 992        "cas    %0,r2,r0\n" \
 993        "mfs    r10,%1" \
 994        : "=r" ((USItype)(ph)), \
 995        "=r" ((USItype)(pl)) \
 996        : "%r" (__m0), \
 997        "r" (__m1) \
 998        : "r2"); \
 999        (ph) += ((((SItype) __m0 >> 31) & __m1) \
1000        + (((SItype) __m1 >> 31) & __m0)); \
1001} while (0)
1002#define UMUL_TIME 20
1003#define UDIV_TIME 200
1004#define count_leading_zeros(count, x) \
1005do { \
1006        if ((x) >= 0x10000) \
1007                __asm__ ("clz     %0,%1" \
1008                : "=r" ((USItype)(count)) \
1009                : "r" ((USItype)(x) >> 16)); \
1010        else { \
1011                __asm__ ("clz   %0,%1" \
1012                : "=r" ((USItype)(count)) \
1013                : "r" ((USItype)(x))); \
1014                (count) += 16; \
1015        } \
1016} while (0)
1017#endif /* RT/ROMP */
1018
1019/***************************************
1020        **************  SH2  ******************
1021        ***************************************/
1022#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
1023        && W_TYPE_SIZE == 32
1024#define umul_ppmm(w1, w0, u, v) \
1025        __asm__ ( \
1026        "dmulu.l %2,%3\n" \
1027        "sts    macl,%1\n" \
1028        "sts    mach,%0" \
1029        : "=r" ((USItype)(w1)), \
1030        "=r" ((USItype)(w0)) \
1031        : "r" ((USItype)(u)), \
1032        "r" ((USItype)(v)) \
1033        : "macl", "mach")
1034#define UMUL_TIME 5
1035#endif
1036
1037/***************************************
1038        **************  SPARC   ****************
1039        ***************************************/
1040#if defined(__sparc__) && W_TYPE_SIZE == 32
1041#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1042        __asm__ ("addcc %r4,%5,%1\n" \
1043        "addx %r2,%3,%0" \
1044        : "=r" ((USItype)(sh)), \
1045        "=&r" ((USItype)(sl)) \
1046        : "%rJ" ((USItype)(ah)), \
1047        "rI" ((USItype)(bh)), \
1048        "%rJ" ((USItype)(al)), \
1049        "rI" ((USItype)(bl)) \
1050        __CLOBBER_CC)
1051#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1052        __asm__ ("subcc %r4,%5,%1\n" \
1053        "subx %r2,%3,%0" \
1054        : "=r" ((USItype)(sh)), \
1055        "=&r" ((USItype)(sl)) \
1056        : "rJ" ((USItype)(ah)), \
1057        "rI" ((USItype)(bh)), \
1058        "rJ" ((USItype)(al)), \
1059        "rI" ((USItype)(bl)) \
1060        __CLOBBER_CC)
1061#if defined(__sparc_v8__)
1062/* Don't match immediate range because, 1) it is not often useful,
1063        2) the 'I' flag thinks of the range as a 13 bit signed interval,
1064        while we want to match a 13 bit interval, sign extended to 32 bits,
1065        but INTERPRETED AS UNSIGNED.  */
1066#define umul_ppmm(w1, w0, u, v) \
1067        __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1068        : "=r" ((USItype)(w1)), \
1069        "=r" ((USItype)(w0)) \
1070        : "r" ((USItype)(u)), \
1071        "r" ((USItype)(v)))
1072#define UMUL_TIME 5
1073#ifndef SUPERSPARC              /* SuperSPARC's udiv only handles 53 bit dividends */
1074#define udiv_qrnnd(q, r, n1, n0, d) \
1075do { \
1076        USItype __q; \
1077        __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1078        : "=r" ((USItype)(__q)) \
1079        : "r" ((USItype)(n1)), \
1080        "r" ((USItype)(n0)), \
1081        "r" ((USItype)(d))); \
1082        (r) = (n0) - __q * (d); \
1083        (q) = __q; \
1084} while (0)
1085#define UDIV_TIME 25
1086#endif /* SUPERSPARC */
1087#else /* ! __sparc_v8__ */
1088#if defined(__sparclite__)
1089/* This has hardware multiply but not divide.  It also has two additional
1090        instructions scan (ffs from high bit) and divscc.  */
1091#define umul_ppmm(w1, w0, u, v) \
1092        __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1093        : "=r" ((USItype)(w1)), \
1094        "=r" ((USItype)(w0)) \
1095        : "r" ((USItype)(u)), \
1096        "r" ((USItype)(v)))
1097#define UMUL_TIME 5
1098#define udiv_qrnnd(q, r, n1, n0, d) \
1099        __asm__ ("! Inlined udiv_qrnnd\n" \
1100        "wr     %%g0,%2,%%y     ! Not a delayed write for sparclite\n" \
1101        "tst    %%g0\n" \
1102        "divscc %3,%4,%%g1\n" \
1103        "divscc %%g1,%4,%%g1\n" \
1104        "divscc %%g1,%4,%%g1\n" \
1105        "divscc %%g1,%4,%%g1\n" \
1106        "divscc %%g1,%4,%%g1\n" \
1107        "divscc %%g1,%4,%%g1\n" \
1108        "divscc %%g1,%4,%%g1\n" \
1109        "divscc %%g1,%4,%%g1\n" \
1110        "divscc %%g1,%4,%%g1\n" \
1111        "divscc %%g1,%4,%%g1\n" \
1112        "divscc %%g1,%4,%%g1\n" \
1113        "divscc %%g1,%4,%%g1\n" \
1114        "divscc %%g1,%4,%%g1\n" \
1115        "divscc %%g1,%4,%%g1\n" \
1116        "divscc %%g1,%4,%%g1\n" \
1117        "divscc %%g1,%4,%%g1\n" \
1118        "divscc %%g1,%4,%%g1\n" \
1119        "divscc %%g1,%4,%%g1\n" \
1120        "divscc %%g1,%4,%%g1\n" \
1121        "divscc %%g1,%4,%%g1\n" \
1122        "divscc %%g1,%4,%%g1\n" \
1123        "divscc %%g1,%4,%%g1\n" \
1124        "divscc %%g1,%4,%%g1\n" \
1125        "divscc %%g1,%4,%%g1\n" \
1126        "divscc %%g1,%4,%%g1\n" \
1127        "divscc %%g1,%4,%%g1\n" \
1128        "divscc %%g1,%4,%%g1\n" \
1129        "divscc %%g1,%4,%%g1\n" \
1130        "divscc %%g1,%4,%%g1\n" \
1131        "divscc %%g1,%4,%%g1\n" \
1132        "divscc %%g1,%4,%%g1\n" \
1133        "divscc %%g1,%4,%0\n" \
1134        "rd     %%y,%1\n" \
1135        "bl,a 1f\n" \
1136        "add    %1,%4,%1\n" \
1137        "1:     ! End of inline udiv_qrnnd" \
1138        : "=r" ((USItype)(q)), \
1139        "=r" ((USItype)(r)) \
1140        : "r" ((USItype)(n1)), \
1141        "r" ((USItype)(n0)), \
1142        "rI" ((USItype)(d)) \
1143        : "%g1" __AND_CLOBBER_CC)
1144#define UDIV_TIME 37
1145#define count_leading_zeros(count, x) \
1146        __asm__ ("scan %1,0,%0" \
1147        : "=r" ((USItype)(x)) \
1148        : "r" ((USItype)(count)))
1149/* Early sparclites return 63 for an argument of 0, but they warn that future
1150        implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1151        undefined.  */
1152#endif /* __sparclite__ */
1153#endif /* __sparc_v8__ */
1154        /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
1155#ifndef umul_ppmm
1156#define umul_ppmm(w1, w0, u, v) \
1157        __asm__ ("! Inlined umul_ppmm\n" \
1158        "wr     %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n" \
1159        "sra    %3,31,%%g2      ! Don't move this insn\n" \
1160        "and    %2,%%g2,%%g2    ! Don't move this insn\n" \
1161        "andcc  %%g0,0,%%g1     ! Don't move this insn\n" \
1162        "mulscc %%g1,%3,%%g1\n" \
1163        "mulscc %%g1,%3,%%g1\n" \
1164        "mulscc %%g1,%3,%%g1\n" \
1165        "mulscc %%g1,%3,%%g1\n" \
1166        "mulscc %%g1,%3,%%g1\n" \
1167        "mulscc %%g1,%3,%%g1\n" \
1168        "mulscc %%g1,%3,%%g1\n" \
1169        "mulscc %%g1,%3,%%g1\n" \
1170        "mulscc %%g1,%3,%%g1\n" \
1171        "mulscc %%g1,%3,%%g1\n" \
1172        "mulscc %%g1,%3,%%g1\n" \
1173        "mulscc %%g1,%3,%%g1\n" \
1174        "mulscc %%g1,%3,%%g1\n" \
1175        "mulscc %%g1,%3,%%g1\n" \
1176        "mulscc %%g1,%3,%%g1\n" \
1177        "mulscc %%g1,%3,%%g1\n" \
1178        "mulscc %%g1,%3,%%g1\n" \
1179        "mulscc %%g1,%3,%%g1\n" \
1180        "mulscc %%g1,%3,%%g1\n" \
1181        "mulscc %%g1,%3,%%g1\n" \
1182        "mulscc %%g1,%3,%%g1\n" \
1183        "mulscc %%g1,%3,%%g1\n" \
1184        "mulscc %%g1,%3,%%g1\n" \
1185        "mulscc %%g1,%3,%%g1\n" \
1186        "mulscc %%g1,%3,%%g1\n" \
1187        "mulscc %%g1,%3,%%g1\n" \
1188        "mulscc %%g1,%3,%%g1\n" \
1189        "mulscc %%g1,%3,%%g1\n" \
1190        "mulscc %%g1,%3,%%g1\n" \
1191        "mulscc %%g1,%3,%%g1\n" \
1192        "mulscc %%g1,%3,%%g1\n" \
1193        "mulscc %%g1,%3,%%g1\n" \
1194        "mulscc %%g1,0,%%g1\n" \
1195        "add    %%g1,%%g2,%0\n" \
1196        "rd     %%y,%1" \
1197        : "=r" ((USItype)(w1)), \
1198        "=r" ((USItype)(w0)) \
1199        : "%rI" ((USItype)(u)), \
1200        "r" ((USItype)(v)) \
1201        : "%g1", "%g2" __AND_CLOBBER_CC)
1202#define UMUL_TIME 39            /* 39 instructions */
1203/* It's quite necessary to add this much assembler for the sparc.
1204   The default udiv_qrnnd (in C) is more than 10 times slower!  */
1205#define udiv_qrnnd(q, r, n1, n0, d) \
1206  __asm__ ("! Inlined udiv_qrnnd\n\t"                                   \
1207           "mov 32,%%g1\n\t"                                            \
1208           "subcc       %1,%2,%%g0\n\t"                                 \
1209           "1:  bcs     5f\n\t"                                         \
1210           "addxcc %0,%0,%0     ! shift n1n0 and a q-bit in lsb\n\t"    \
1211           "sub %1,%2,%1        ! this kills msb of n\n\t"              \
1212           "addx        %1,%1,%1        ! so this can't give carry\n\t" \
1213           "subcc       %%g1,1,%%g1\n\t"                                \
1214           "2:  bne     1b\n\t"                                         \
1215           "subcc       %1,%2,%%g0\n\t"                                 \
1216           "bcs 3f\n\t"                                                 \
1217           "addxcc %0,%0,%0     ! shift n1n0 and a q-bit in lsb\n\t"    \
1218           "b           3f\n\t"                                         \
1219           "sub %1,%2,%1        ! this kills msb of n\n\t"              \
1220           "4:  sub     %1,%2,%1\n\t"                                   \
1221           "5:  addxcc  %1,%1,%1\n\t"                                   \
1222           "bcc 2b\n\t"                                                 \
1223           "subcc       %%g1,1,%%g1\n\t"                                \
1224           "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
1225           "bne 4b\n\t"                                                 \
1226           "addcc       %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n\t" \
1227           "sub %1,%2,%1\n\t"                                           \
1228           "3:  xnor    %0,0,%0\n\t"                                    \
1229           "! End of inline udiv_qrnnd\n"                               \
1230           : "=&r" ((USItype)(q)),                                      \
1231             "=&r" ((USItype)(r))                                       \
1232           : "r" ((USItype)(d)),                                        \
1233             "1" ((USItype)(n1)),                                       \
1234             "0" ((USItype)(n0)) : "%g1", "cc")
1235#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1236#endif
1237#endif /* __sparc__ */
1238
1239/***************************************
1240        **************  VAX  ******************
1241        ***************************************/
1242#if defined(__vax__) && W_TYPE_SIZE == 32
1243#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1244        __asm__ ("addl2 %5,%1\n" \
1245        "adwc %3,%0" \
1246        : "=g" ((USItype)(sh)), \
1247        "=&g" ((USItype)(sl)) \
1248        : "%0" ((USItype)(ah)), \
1249        "g" ((USItype)(bh)), \
1250        "%1" ((USItype)(al)), \
1251        "g" ((USItype)(bl)))
1252#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1253        __asm__ ("subl2 %5,%1\n" \
1254        "sbwc %3,%0" \
1255        : "=g" ((USItype)(sh)), \
1256        "=&g" ((USItype)(sl)) \
1257        : "0" ((USItype)(ah)), \
1258        "g" ((USItype)(bh)), \
1259        "1" ((USItype)(al)), \
1260        "g" ((USItype)(bl)))
1261#define umul_ppmm(xh, xl, m0, m1) \
1262do { \
1263        union {UDItype __ll; \
1264        struct {USItype __l, __h; } __i; \
1265        } __xx; \
1266        USItype __m0 = (m0), __m1 = (m1); \
1267        __asm__ ("emul %1,%2,$0,%0" \
1268        : "=g" (__xx.__ll) \
1269        : "g" (__m0), \
1270        "g" (__m1)); \
1271        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1272        (xh) += ((((SItype) __m0 >> 31) & __m1) \
1273        + (((SItype) __m1 >> 31) & __m0)); \
1274} while (0)
1275#define sdiv_qrnnd(q, r, n1, n0, d) \
1276do { \
1277        union {DItype __ll; \
1278        struct {SItype __l, __h; } __i; \
1279        } __xx; \
1280        __xx.__i.__h = n1; __xx.__i.__l = n0; \
1281        __asm__ ("ediv %3,%2,%0,%1" \
1282        : "=g" (q), "=g" (r) \
1283        : "g" (__xx.__ll), "g" (d)); \
1284} while (0)
1285#endif /* __vax__ */
1286
1287/***************************************
1288        **************  Z8000   ****************
1289        ***************************************/
1290#if defined(__z8000__) && W_TYPE_SIZE == 16
1291#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1292        __asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
1293        : "=r" ((unsigned int)(sh)), \
1294        "=&r" ((unsigned int)(sl)) \
1295        : "%0" ((unsigned int)(ah)), \
1296        "r" ((unsigned int)(bh)), \
1297        "%1" ((unsigned int)(al)), \
1298        "rQR" ((unsigned int)(bl)))
1299#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1300        __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
1301        : "=r" ((unsigned int)(sh)), \
1302        "=&r" ((unsigned int)(sl)) \
1303        : "0" ((unsigned int)(ah)), \
1304        "r" ((unsigned int)(bh)), \
1305        "1" ((unsigned int)(al)), \
1306        "rQR" ((unsigned int)(bl)))
1307#define umul_ppmm(xh, xl, m0, m1) \
1308do { \
1309        union {long int __ll; \
1310        struct {unsigned int __h, __l; } __i; \
1311        } __xx; \
1312        unsigned int __m0 = (m0), __m1 = (m1); \
1313        __asm__ ("mult      %S0,%H3" \
1314        : "=r" (__xx.__i.__h), \
1315        "=r" (__xx.__i.__l) \
1316        : "%1" (__m0), \
1317        "rQR" (__m1)); \
1318        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1319        (xh) += ((((signed int) __m0 >> 15) & __m1) \
1320        + (((signed int) __m1 >> 15) & __m0)); \
1321} while (0)
1322#endif /* __z8000__ */
1323
1324#endif /* __GNUC__ */
1325
1326/***************************************
1327        ***********  Generic Versions   ********
1328        ***************************************/
1329#if !defined(umul_ppmm) && defined(__umulsidi3)
1330#define umul_ppmm(ph, pl, m0, m1) \
1331{ \
1332        UDWtype __ll = __umulsidi3(m0, m1); \
1333        ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1334        pl = (UWtype) __ll; \
1335}
1336#endif
1337
1338#if !defined(__umulsidi3)
1339#define __umulsidi3(u, v) \
1340        ({UWtype __hi, __lo; \
1341        umul_ppmm(__hi, __lo, u, v); \
1342        ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1343#endif
1344
1345        /* If this machine has no inline assembler, use C macros.  */
1346
1347#if !defined(add_ssaaaa)
1348#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1349do { \
1350        UWtype __x; \
1351        __x = (al) + (bl); \
1352        (sh) = (ah) + (bh) + (__x < (al)); \
1353        (sl) = __x; \
1354} while (0)
1355#endif
1356
1357#if !defined(sub_ddmmss)
1358#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1359do { \
1360        UWtype __x; \
1361        __x = (al) - (bl); \
1362        (sh) = (ah) - (bh) - (__x > (al)); \
1363        (sl) = __x; \
1364} while (0)
1365#endif
1366
1367#if !defined(umul_ppmm)
1368#define umul_ppmm(w1, w0, u, v) \
1369do { \
1370        UWtype __x0, __x1, __x2, __x3; \
1371        UHWtype __ul, __vl, __uh, __vh; \
1372        UWtype __u = (u), __v = (v); \
1373        \
1374        __ul = __ll_lowpart(__u); \
1375        __uh = __ll_highpart(__u); \
1376        __vl = __ll_lowpart(__v); \
1377        __vh = __ll_highpart(__v); \
1378        \
1379        __x0 = (UWtype) __ul * __vl; \
1380        __x1 = (UWtype) __ul * __vh; \
1381        __x2 = (UWtype) __uh * __vl; \
1382        __x3 = (UWtype) __uh * __vh; \
1383        \
1384        __x1 += __ll_highpart(__x0);/* this can't give carry */ \
1385        __x1 += __x2;           /* but this indeed can */ \
1386        if (__x1 < __x2)                /* did we get it? */ \
1387        __x3 += __ll_B;         /* yes, add it in the proper pos. */ \
1388        \
1389        (w1) = __x3 + __ll_highpart(__x1); \
1390        (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
1391} while (0)
1392#endif
1393
1394#if !defined(umul_ppmm)
1395#define smul_ppmm(w1, w0, u, v) \
1396do { \
1397        UWtype __w1; \
1398        UWtype __m0 = (u), __m1 = (v); \
1399        umul_ppmm(__w1, w0, __m0, __m1); \
1400        (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
1401        - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
1402} while (0)
1403#endif
1404
1405        /* Define this unconditionally, so it can be used for debugging.  */
1406#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1407do { \
1408        UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1409        __d1 = __ll_highpart(d); \
1410        __d0 = __ll_lowpart(d); \
1411        \
1412        __r1 = (n1) % __d1; \
1413        __q1 = (n1) / __d1; \
1414        __m = (UWtype) __q1 * __d0; \
1415        __r1 = __r1 * __ll_B | __ll_highpart(n0); \
1416        if (__r1 < __m) { \
1417                __q1--, __r1 += (d); \
1418                if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
1419                if (__r1 < __m) \
1420                        __q1--, __r1 += (d); \
1421        } \
1422        __r1 -= __m; \
1423        \
1424        __r0 = __r1 % __d1; \
1425        __q0 = __r1 / __d1; \
1426        __m = (UWtype) __q0 * __d0; \
1427        __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
1428        if (__r0 < __m) { \
1429                __q0--, __r0 += (d); \
1430                if (__r0 >= (d)) \
1431                        if (__r0 < __m) \
1432                                __q0--, __r0 += (d); \
1433        } \
1434        __r0 -= __m; \
1435        \
1436        (q) = (UWtype) __q1 * __ll_B | __q0; \
1437        (r) = __r0; \
1438} while (0)
1439
1440/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1441        __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1442#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
1443#define udiv_qrnnd(q, r, nh, nl, d) \
1444do { \
1445        UWtype __r; \
1446        (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1447        (r) = __r; \
1448} while (0)
1449#endif
1450
1451        /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1452#if !defined(udiv_qrnnd)
1453#define UDIV_NEEDS_NORMALIZATION 1
1454#define udiv_qrnnd __udiv_qrnnd_c
1455#endif
1456
1457#undef count_leading_zeros
1458#if !defined(count_leading_zeros)
1459        extern
1460#ifdef __STDC__
1461                        const
1462#endif
1463                        unsigned char __clz_tab[];
1464#define count_leading_zeros(count, x) \
1465do { \
1466        UWtype __xr = (x); \
1467        UWtype __a; \
1468        \
1469        if (W_TYPE_SIZE <= 32) { \
1470                __a = __xr < ((UWtype) 1 << 2*__BITS4) \
1471                ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
1472                : (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
1473        } \
1474        else { \
1475                for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
1476                        if (((__xr >> __a) & 0xff) != 0) \
1477                                break; \
1478        } \
1479        \
1480        (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
1481} while (0)
1482        /* This version gives a well-defined value for zero. */
1483#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1484#endif
1485
1486#if !defined(count_trailing_zeros)
1487/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1488        defined in asm, but if it is not, the C version above is good enough.  */
1489#define count_trailing_zeros(count, x) \
1490do { \
1491        UWtype __ctz_x = (x); \
1492        UWtype __ctz_c; \
1493        count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
1494        (count) = W_TYPE_SIZE - 1 - __ctz_c; \
1495} while (0)
1496#endif
1497
1498#ifndef UDIV_NEEDS_NORMALIZATION
1499#define UDIV_NEEDS_NORMALIZATION 0
1500#endif
1501
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.