linux/arch/blackfin/lib/udivsi3.S
<<
>>
Prefs
   1/*
   2 * File:         arch/blackfin/lib/udivsi3.S
   3 * Based on:
   4 * Author:
   5 *
   6 * Created:
   7 * Description:
   8 *
   9 * Modified:
  10 *               Copyright 2004-2006 Analog Devices Inc.
  11 *
  12 * Bugs:         Enter bugs at http://blackfin.uclinux.org/
  13 *
  14 * This program is free software; you can redistribute it and/or modify
  15 * it under the terms of the GNU General Public License as published by
  16 * the Free Software Foundation; either version 2 of the License, or
  17 * (at your option) any later version.
  18 *
  19 * This program is distributed in the hope that it will be useful,
  20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 * GNU General Public License for more details.
  23 *
  24 * You should have received a copy of the GNU General Public License
  25 * along with this program; if not, see the file COPYING, or write
  26 * to the Free Software Foundation, Inc.,
  27 * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  28 */
  29
  30#include <linux/linkage.h>
  31
  32#define CARRY AC0
  33
  34#ifdef CONFIG_ARITHMETIC_OPS_L1
  35.section .l1.text
  36#else
  37.text
  38#endif
  39
  40
  41ENTRY(___udivsi3)
  42
  43  CC = R0 < R1 (IU);    /* If X < Y, always return 0 */
  44  IF CC JUMP .Lreturn_ident;
  45
  46  R2 = R1 << 16;
  47  CC = R2 <= R0 (IU);
  48  IF CC JUMP .Lidents;
  49
  50  R2 = R0 >> 31;       /* if X is a 31-bit number */
  51  R3 = R1 >> 15;       /* and Y is a 15-bit number */
  52  R2 = R2 | R3;        /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
  53  CC = R2;
  54  IF CC JUMP .Ly_16bit;
  55
  56/* METHOD 1: FAST DIVQ
  57   We know we have a 31-bit dividend, and 15-bit divisor so we can use the
  58   simple divq approach (first setting AQ to 0 - implying unsigned division,
  59   then 16 DIVQ's).
  60*/
  61
  62  AQ = CC;             /* Clear AQ (CC==0) */
  63
  64/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
  65   we need to shift the dividend one bit to the left.
  66   We have already checked that we have a 31-bit number so we are safe to do
  67   that.
  68*/
  69  R0 <<= 1;
  70  DIVQ(R0, R1); // 1
  71  DIVQ(R0, R1); // 2
  72  DIVQ(R0, R1); // 3
  73  DIVQ(R0, R1); // 4
  74  DIVQ(R0, R1); // 5
  75  DIVQ(R0, R1); // 6
  76  DIVQ(R0, R1); // 7
  77  DIVQ(R0, R1); // 8
  78  DIVQ(R0, R1); // 9
  79  DIVQ(R0, R1); // 10
  80  DIVQ(R0, R1); // 11
  81  DIVQ(R0, R1); // 12
  82  DIVQ(R0, R1); // 13
  83  DIVQ(R0, R1); // 14
  84  DIVQ(R0, R1); // 15
  85  DIVQ(R0, R1); // 16
  86  R0 = R0.L (Z);
  87  RTS;
  88
  89.Ly_16bit:
  90  /* We know that the upper 17 bits of Y might have bits set,
  91  ** or that the sign bit of X might have a bit. If Y is a
  92  ** 16-bit number, but not bigger, then we can use the builtins
  93  ** with a post-divide correction.
  94  ** R3 currently holds Y>>15, which means R3's LSB is the
  95  ** bit we're interested in.
  96  */
  97
  98  /* According to the ISR, to use the Divide primitives for
  99  ** unsigned integer divide, the useable range is 31 bits
 100  */
 101  CC = ! BITTST(R0, 31);
 102
 103  /* IF condition is true we can scale our inputs and use the divide primitives,
 104  ** with some post-adjustment
 105  */
 106  R3 += -1;             /* if so, Y is 0x00008nnn */
 107  CC &= AZ;
 108
 109  /* If condition is true we can scale our inputs and use the divide primitives,
 110  ** with some post-adjustment
 111  */
 112  R3 = R1 >> 1;         /* Pre-scaled divisor for primitive case */
 113  R2 = R0 >> 16;
 114
 115  R2 = R3 - R2;         /* shifted divisor < upper 16 bits of dividend */
 116  CC &= CARRY;
 117  IF CC JUMP .Lshift_and_correct;
 118
 119  /* Fall through to the identities */
 120
 121/* METHOD 2: identities and manual calculation
 122   We are not able to use the divide primites, but may still catch some special
 123   cases.
 124*/
 125.Lidents:
 126  /* Test for common identities. Value to be returned is placed in R2. */
 127  CC = R0 == 0;        /* 0/Y => 0 */
 128  IF CC JUMP .Lreturn_r0;
 129  CC = R0 == R1;       /* X==Y => 1 */
 130  IF CC JUMP .Lreturn_ident;
 131  CC = R1 == 1;        /* X/1 => X */
 132  IF CC JUMP .Lreturn_ident;
 133
 134  R2.L = ONES R1;
 135  R2 = R2.L (Z);
 136  CC = R2 == 1;
 137  IF CC JUMP .Lpower_of_two;
 138
 139  [--SP] = (R7:5);                /* Push registers R5-R7 */
 140
 141  /* Idents don't match. Go for the full operation. */
 142
 143
 144  R6 = 2;                         /* assume we'll shift two */
 145  R3 = 1;
 146
 147  P2 = R1;
 148                                  /* If either R0 or R1 have sign set, */
 149                                  /* divide them by two, and note it's */
 150                                  /* been done. */
 151  CC = R1 < 0;
 152  R2 = R1 >> 1;
 153  IF CC R1 = R2;                  /* Possibly-shifted R1 */
 154  IF !CC R6 = R3;                 /* R1 doesn't, so at most 1 shifted */
 155
 156  P0 = 0;
 157  R3 = -R1;
 158  [--SP] = R3;
 159  R2 = R0 >> 1;
 160  R2 = R0 >> 1;
 161  CC = R0 < 0;
 162  IF CC P0 = R6;                  /* Number of values divided */
 163  IF !CC R2 = R0;                 /* Shifted R0 */
 164
 165                                  /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
 166
 167                                  /* r2 holds Copy dividend  */
 168  R3 = 0;                         /* Clear partial remainder */
 169  R7 = 0;                         /* Initialise quotient bit */
 170
 171  P1 = 32;                        /* Set loop counter */
 172  LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
 173.Lulst:  R6 = R2 >> 31;             /* R6 = sign bit of R2, for carry */
 174       R2 = R2 << 1;              /* Shift 64 bit dividend up by 1 bit */
 175       R3 = R3 << 1 || R5 = [SP];
 176       R3 = R3 | R6;              /* Include any carry */
 177       CC = R7 < 0;               /* Check quotient(AQ) */
 178                                  /* If AQ==0, we'll sub divisor */
 179       IF CC R5 = R1;             /* and if AQ==1, we'll add it. */
 180       R3 = R3 + R5;              /* Add/sub divsor to partial remainder */
 181       R7 = R3 ^ R1;              /* Generate next quotient bit */
 182
 183       R5 = R7 >> 31;             /* Get AQ */
 184       BITTGL(R5, 0);             /* Invert it, to get what we'll shift */
 185.Lulend: R2 = R2 + R5;              /* and "shift" it in. */
 186
 187  CC = P0 == 0;                   /* Check how many inputs we shifted */
 188  IF CC JUMP .Lno_mult;            /* if none... */
 189  R6 = R2 << 1;
 190  CC = P0 == 1;
 191  IF CC R2 = R6;                  /* if 1, Q = Q*2 */
 192  IF !CC R1 = P2;                 /* if 2, restore stored divisor */
 193
 194  R3 = R2;                        /* Copy of R2 */
 195  R3 *= R1;                       /* Q * divisor */
 196  R5 = R0 - R3;                   /* Z = (dividend - Q * divisor) */
 197  CC = R1 <= R5 (IU);             /* Check if divisor <= Z? */
 198  R6 = CC;                        /* if yes, R6 = 1 */
 199  R2 = R2 + R6;                   /* if yes, add one to quotient(Q) */
 200.Lno_mult:
 201  SP += 4;
 202  (R7:5) = [SP++];                /* Pop registers R5-R7 */
 203  R0 = R2;                        /* Store quotient */
 204  RTS;
 205
 206.Lreturn_ident:
 207  CC = R0 < R1 (IU);    /* If X < Y, always return 0 */
 208  R2 = 0;
 209  IF CC JUMP .Ltrue_return_ident;
 210  R2 = -1 (X);         /* X/0 => 0xFFFFFFFF */
 211  CC = R1 == 0;
 212  IF CC JUMP .Ltrue_return_ident;
 213  R2 = -R2;            /* R2 now 1 */
 214  CC = R0 == R1;       /* X==Y => 1 */
 215  IF CC JUMP .Ltrue_return_ident;
 216  R2 = R0;             /* X/1 => X */
 217  /*FALLTHRU*/
 218
 219.Ltrue_return_ident:
 220  R0 = R2;
 221.Lreturn_r0:
 222  RTS;
 223
 224.Lpower_of_two:
 225  /* Y has a single bit set, which means it's a power of two.
 226  ** That means we can perform the division just by shifting
 227  ** X to the right the appropriate number of bits
 228  */
 229
 230  /* signbits returns the number of sign bits, minus one.
 231  ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
 232  ** to shift right n-signbits spaces. It also means 0x80000000
 233  ** is a special case, because that *also* gives a signbits of 0
 234  */
 235
 236  R2 = R0 >> 31;
 237  CC = R1 < 0;
 238  IF CC JUMP .Ltrue_return_ident;
 239
 240  R1.l = SIGNBITS R1;
 241  R1 = R1.L (Z);
 242  R1 += -30;
 243  R0 = LSHIFT R0 by R1.L;
 244  RTS;
 245
 246/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
 247  Two scaling operations are required to use the divide primitives with a
 248  divisor > 0x7FFFF.
 249  Firstly (as in method 1) we need to shift the dividend 1 to the left for
 250  integer division.
 251  Secondly we need to shift both the divisor and dividend 1 to the right so
 252  both are in range for the primitives.
 253  The left/right shift of the dividend does nothing so we can skip it.
 254*/
 255.Lshift_and_correct:
 256  R2 = R0;
 257  // R3 is already R1 >> 1
 258  CC=!CC;
 259  AQ = CC;                        /* Clear AQ, got here with CC = 0 */
 260  DIVQ(R2, R3); // 1
 261  DIVQ(R2, R3); // 2
 262  DIVQ(R2, R3); // 3
 263  DIVQ(R2, R3); // 4
 264  DIVQ(R2, R3); // 5
 265  DIVQ(R2, R3); // 6
 266  DIVQ(R2, R3); // 7
 267  DIVQ(R2, R3); // 8
 268  DIVQ(R2, R3); // 9
 269  DIVQ(R2, R3); // 10
 270  DIVQ(R2, R3); // 11
 271  DIVQ(R2, R3); // 12
 272  DIVQ(R2, R3); // 13
 273  DIVQ(R2, R3); // 14
 274  DIVQ(R2, R3); // 15
 275  DIVQ(R2, R3); // 16
 276
 277  /* According to the Instruction Set Reference:
 278     To divide by a divisor > 0x7FFF,
 279     1. prescale and perform divide to obtain quotient (Q) (done above),
 280     2. multiply quotient by unscaled divisor (result M)
 281     3. subtract the product from the divident to get an error (E = X - M)
 282     4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
 283   */
 284  R3 = R2.L (Z);                /* Q = X' / Y' */
 285  R2 = R3;              /* Preserve Q */
 286  R2 *= R1;             /* M = Q * Y */
 287  R2 = R0 - R2;         /* E = X - M */
 288  R0 = R3;              /* Copy Q into result reg */
 289
 290/* Correction: If result of the multiply is negative, we overflowed
 291   and need to correct the result by subtracting 1 from the result.*/
 292  R3 = 0xFFFF (Z);
 293  R2 = R2 >> 16;                /* E >> 16 */
 294  CC = R2 == R3;
 295  R3 = 1 ;
 296  R1 = R0 - R3;
 297  IF CC R0 = R1;
 298  RTS;
 299
 300ENDPROC(___udivsi3)
 301
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.