linux/arch/i386/lib/checksum.S
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IP/TCP/UDP checksumming routines
   7 *
   8 * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
   9 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  10 *              Tom May, <ftom@netcom.com>
  11 *              Pentium Pro/II routines:
  12 *              Alexander Kjeldaas <astor@guardian.no>
  13 *              Finn Arne Gangstad <finnag@guardian.no>
  14 *              Lots of code moved from tcp.c and ip.c; see those files
  15 *              for more names.
  16 *
  17 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  18 *                           handling.
  19 *              Andi Kleen,  add zeroing on error
  20 *                   converted to pure assembler
  21 *
  22 *              This program is free software; you can redistribute it and/or
  23 *              modify it under the terms of the GNU General Public License
  24 *              as published by the Free Software Foundation; either version
  25 *              2 of the License, or (at your option) any later version.
  26 */
  27
  28#include <linux/linkage.h>
  29#include <asm/dwarf2.h>
  30#include <asm/errno.h>
  31                                
  32/*
  33 * computes a partial checksum, e.g. for TCP/UDP fragments
  34 */
  35
  36/*      
  37unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  38 */
  39                
  40.text
  41                
  42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
  43
  44          /*            
  45           * Experiments with Ethernet and SLIP connections show that buff
  46           * is aligned on either a 2-byte or 4-byte boundary.  We get at
  47           * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  48           * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  49           * alignment for the unrolled loop.
  50           */           
  51ENTRY(csum_partial)
  52        CFI_STARTPROC
  53        pushl %esi
  54        CFI_ADJUST_CFA_OFFSET 4
  55        CFI_REL_OFFSET esi, 0
  56        pushl %ebx
  57        CFI_ADJUST_CFA_OFFSET 4
  58        CFI_REL_OFFSET ebx, 0
  59        movl 20(%esp),%eax      # Function arg: unsigned int sum
  60        movl 16(%esp),%ecx      # Function arg: int len
  61        movl 12(%esp),%esi      # Function arg: unsigned char *buff
  62        testl $3, %esi          # Check alignment.
  63        jz 2f                   # Jump if alignment is ok.
  64        testl $1, %esi          # Check alignment.
  65        jz 10f                  # Jump if alignment is boundary of 2bytes.
  66
  67        # buf is odd
  68        dec %ecx
  69        jl 8f
  70        movzbl (%esi), %ebx
  71        adcl %ebx, %eax
  72        roll $8, %eax
  73        inc %esi
  74        testl $2, %esi
  75        jz 2f
  7610:
  77        subl $2, %ecx           # Alignment uses up two bytes.
  78        jae 1f                  # Jump if we had at least two bytes.
  79        addl $2, %ecx           # ecx was < 2.  Deal with it.
  80        jmp 4f
  811:      movw (%esi), %bx
  82        addl $2, %esi
  83        addw %bx, %ax
  84        adcl $0, %eax
  852:
  86        movl %ecx, %edx
  87        shrl $5, %ecx
  88        jz 2f
  89        testl %esi, %esi
  901:      movl (%esi), %ebx
  91        adcl %ebx, %eax
  92        movl 4(%esi), %ebx
  93        adcl %ebx, %eax
  94        movl 8(%esi), %ebx
  95        adcl %ebx, %eax
  96        movl 12(%esi), %ebx
  97        adcl %ebx, %eax
  98        movl 16(%esi), %ebx
  99        adcl %ebx, %eax
 100        movl 20(%esi), %ebx
 101        adcl %ebx, %eax
 102        movl 24(%esi), %ebx
 103        adcl %ebx, %eax
 104        movl 28(%esi), %ebx
 105        adcl %ebx, %eax
 106        lea 32(%esi), %esi
 107        dec %ecx
 108        jne 1b
 109        adcl $0, %eax
 1102:      movl %edx, %ecx
 111        andl $0x1c, %edx
 112        je 4f
 113        shrl $2, %edx           # This clears CF
 1143:      adcl (%esi), %eax
 115        lea 4(%esi), %esi
 116        dec %edx
 117        jne 3b
 118        adcl $0, %eax
 1194:      andl $3, %ecx
 120        jz 7f
 121        cmpl $2, %ecx
 122        jb 5f
 123        movw (%esi),%cx
 124        leal 2(%esi),%esi
 125        je 6f
 126        shll $16,%ecx
 1275:      movb (%esi),%cl
 1286:      addl %ecx,%eax
 129        adcl $0, %eax 
 1307:      
 131        testl $1, 12(%esp)
 132        jz 8f
 133        roll $8, %eax
 1348:
 135        popl %ebx
 136        CFI_ADJUST_CFA_OFFSET -4
 137        CFI_RESTORE ebx
 138        popl %esi
 139        CFI_ADJUST_CFA_OFFSET -4
 140        CFI_RESTORE esi
 141        ret
 142        CFI_ENDPROC
 143ENDPROC(csum_partial)
 144
 145#else
 146
 147/* Version for PentiumII/PPro */
 148
 149ENTRY(csum_partial)
 150        CFI_STARTPROC
 151        pushl %esi
 152        CFI_ADJUST_CFA_OFFSET 4
 153        CFI_REL_OFFSET esi, 0
 154        pushl %ebx
 155        CFI_ADJUST_CFA_OFFSET 4
 156        CFI_REL_OFFSET ebx, 0
 157        movl 20(%esp),%eax      # Function arg: unsigned int sum
 158        movl 16(%esp),%ecx      # Function arg: int len
 159        movl 12(%esp),%esi      # Function arg: const unsigned char *buf
 160
 161        testl $3, %esi         
 162        jnz 25f                 
 16310:
 164        movl %ecx, %edx
 165        movl %ecx, %ebx
 166        andl $0x7c, %ebx
 167        shrl $7, %ecx
 168        addl %ebx,%esi
 169        shrl $2, %ebx  
 170        negl %ebx
 171        lea 45f(%ebx,%ebx,2), %ebx
 172        testl %esi, %esi
 173        jmp *%ebx
 174
 175        # Handle 2-byte-aligned regions
 17620:     addw (%esi), %ax
 177        lea 2(%esi), %esi
 178        adcl $0, %eax
 179        jmp 10b
 18025:
 181        testl $1, %esi         
 182        jz 30f                 
 183        # buf is odd
 184        dec %ecx
 185        jl 90f
 186        movzbl (%esi), %ebx
 187        addl %ebx, %eax
 188        adcl $0, %eax
 189        roll $8, %eax
 190        inc %esi
 191        testl $2, %esi
 192        jz 10b
 193
 19430:     subl $2, %ecx          
 195        ja 20b                 
 196        je 32f
 197        addl $2, %ecx
 198        jz 80f
 199        movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
 200        addl %ebx, %eax
 201        adcl $0, %eax
 202        jmp 80f
 20332:
 204        addw (%esi), %ax        # csumming 2 bytes, 2-aligned
 205        adcl $0, %eax
 206        jmp 80f
 207
 20840: 
 209        addl -128(%esi), %eax
 210        adcl -124(%esi), %eax
 211        adcl -120(%esi), %eax
 212        adcl -116(%esi), %eax   
 213        adcl -112(%esi), %eax   
 214        adcl -108(%esi), %eax
 215        adcl -104(%esi), %eax
 216        adcl -100(%esi), %eax
 217        adcl -96(%esi), %eax
 218        adcl -92(%esi), %eax
 219        adcl -88(%esi), %eax
 220        adcl -84(%esi), %eax
 221        adcl -80(%esi), %eax
 222        adcl -76(%esi), %eax
 223        adcl -72(%esi), %eax
 224        adcl -68(%esi), %eax
 225        adcl -64(%esi), %eax     
 226        adcl -60(%esi), %eax     
 227        adcl -56(%esi), %eax     
 228        adcl -52(%esi), %eax   
 229        adcl -48(%esi), %eax   
 230        adcl -44(%esi), %eax
 231        adcl -40(%esi), %eax
 232        adcl -36(%esi), %eax
 233        adcl -32(%esi), %eax
 234        adcl -28(%esi), %eax
 235        adcl -24(%esi), %eax
 236        adcl -20(%esi), %eax
 237        adcl -16(%esi), %eax
 238        adcl -12(%esi), %eax
 239        adcl -8(%esi), %eax
 240        adcl -4(%esi), %eax
 24145:
 242        lea 128(%esi), %esi
 243        adcl $0, %eax
 244        dec %ecx
 245        jge 40b
 246        movl %edx, %ecx
 24750:     andl $3, %ecx
 248        jz 80f
 249
 250        # Handle the last 1-3 bytes without jumping
 251        notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
 252        movl $0xffffff,%ebx     # by the shll and shrl instructions
 253        shll $3,%ecx
 254        shrl %cl,%ebx
 255        andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
 256        addl %ebx,%eax
 257        adcl $0,%eax
 25880: 
 259        testl $1, 12(%esp)
 260        jz 90f
 261        roll $8, %eax
 26290: 
 263        popl %ebx
 264        CFI_ADJUST_CFA_OFFSET -4
 265        CFI_RESTORE ebx
 266        popl %esi
 267        CFI_ADJUST_CFA_OFFSET -4
 268        CFI_RESTORE esi
 269        ret
 270        CFI_ENDPROC
 271ENDPROC(csum_partial)
 272                                
 273#endif
 274
 275/*
 276unsigned int csum_partial_copy_generic (const char *src, char *dst,
 277                                  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
 278 */ 
 279
 280/*
 281 * Copy from ds while checksumming, otherwise like csum_partial
 282 *
 283 * The macros SRC and DST specify the type of access for the instruction.
 284 * thus we can call a custom exception handler for all access types.
 285 *
 286 * FIXME: could someone double-check whether I haven't mixed up some SRC and
 287 *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 288 *        them all but there's no guarantee.
 289 */
 290
 291#define SRC(y...)                       \
 292        9999: y;                        \
 293        .section __ex_table, "a";       \
 294        .long 9999b, 6001f      ;       \
 295        .previous
 296
 297#define DST(y...)                       \
 298        9999: y;                        \
 299        .section __ex_table, "a";       \
 300        .long 9999b, 6002f      ;       \
 301        .previous
 302
 303#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 304
 305#define ARGBASE 16              
 306#define FP              12
 307                
 308ENTRY(csum_partial_copy_generic)
 309        CFI_STARTPROC
 310        subl  $4,%esp   
 311        CFI_ADJUST_CFA_OFFSET 4
 312        pushl %edi
 313        CFI_ADJUST_CFA_OFFSET 4
 314        CFI_REL_OFFSET edi, 0
 315        pushl %esi
 316        CFI_ADJUST_CFA_OFFSET 4
 317        CFI_REL_OFFSET esi, 0
 318        pushl %ebx
 319        CFI_ADJUST_CFA_OFFSET 4
 320        CFI_REL_OFFSET ebx, 0
 321        movl ARGBASE+16(%esp),%eax      # sum
 322        movl ARGBASE+12(%esp),%ecx      # len
 323        movl ARGBASE+4(%esp),%esi       # src
 324        movl ARGBASE+8(%esp),%edi       # dst
 325
 326        testl $2, %edi                  # Check alignment. 
 327        jz 2f                           # Jump if alignment is ok.
 328        subl $2, %ecx                   # Alignment uses up two bytes.
 329        jae 1f                          # Jump if we had at least two bytes.
 330        addl $2, %ecx                   # ecx was < 2.  Deal with it.
 331        jmp 4f
 332SRC(1:  movw (%esi), %bx        )
 333        addl $2, %esi
 334DST(    movw %bx, (%edi)        )
 335        addl $2, %edi
 336        addw %bx, %ax   
 337        adcl $0, %eax
 3382:
 339        movl %ecx, FP(%esp)
 340        shrl $5, %ecx
 341        jz 2f
 342        testl %esi, %esi
 343SRC(1:  movl (%esi), %ebx       )
 344SRC(    movl 4(%esi), %edx      )
 345        adcl %ebx, %eax
 346DST(    movl %ebx, (%edi)       )
 347        adcl %edx, %eax
 348DST(    movl %edx, 4(%edi)      )
 349
 350SRC(    movl 8(%esi), %ebx      )
 351SRC(    movl 12(%esi), %edx     )
 352        adcl %ebx, %eax
 353DST(    movl %ebx, 8(%edi)      )
 354        adcl %edx, %eax
 355DST(    movl %edx, 12(%edi)     )
 356
 357SRC(    movl 16(%esi), %ebx     )
 358SRC(    movl 20(%esi), %edx     )
 359        adcl %ebx, %eax
 360DST(    movl %ebx, 16(%edi)     )
 361        adcl %edx, %eax
 362DST(    movl %edx, 20(%edi)     )
 363
 364SRC(    movl 24(%esi), %ebx     )
 365SRC(    movl 28(%esi), %edx     )
 366        adcl %ebx, %eax
 367DST(    movl %ebx, 24(%edi)     )
 368        adcl %edx, %eax
 369DST(    movl %edx, 28(%edi)     )
 370
 371        lea 32(%esi), %esi
 372        lea 32(%edi), %edi
 373        dec %ecx
 374        jne 1b
 375        adcl $0, %eax
 3762:      movl FP(%esp), %edx
 377        movl %edx, %ecx
 378        andl $0x1c, %edx
 379        je 4f
 380        shrl $2, %edx                   # This clears CF
 381SRC(3:  movl (%esi), %ebx       )
 382        adcl %ebx, %eax
 383DST(    movl %ebx, (%edi)       )
 384        lea 4(%esi), %esi
 385        lea 4(%edi), %edi
 386        dec %edx
 387        jne 3b
 388        adcl $0, %eax
 3894:      andl $3, %ecx
 390        jz 7f
 391        cmpl $2, %ecx
 392        jb 5f
 393SRC(    movw (%esi), %cx        )
 394        leal 2(%esi), %esi
 395DST(    movw %cx, (%edi)        )
 396        leal 2(%edi), %edi
 397        je 6f
 398        shll $16,%ecx
 399SRC(5:  movb (%esi), %cl        )
 400DST(    movb %cl, (%edi)        )
 4016:      addl %ecx, %eax
 402        adcl $0, %eax
 4037:
 4045000:
 405
 406# Exception handler:
 407.section .fixup, "ax"                                                   
 408
 4096001:
 410        movl ARGBASE+20(%esp), %ebx     # src_err_ptr
 411        movl $-EFAULT, (%ebx)
 412
 413        # zero the complete destination - computing the rest
 414        # is too much work 
 415        movl ARGBASE+8(%esp), %edi      # dst
 416        movl ARGBASE+12(%esp), %ecx     # len
 417        xorl %eax,%eax
 418        rep ; stosb
 419
 420        jmp 5000b
 421
 4226002:
 423        movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
 424        movl $-EFAULT,(%ebx)
 425        jmp 5000b
 426
 427.previous
 428
 429        popl %ebx
 430        CFI_ADJUST_CFA_OFFSET -4
 431        CFI_RESTORE ebx
 432        popl %esi
 433        CFI_ADJUST_CFA_OFFSET -4
 434        CFI_RESTORE esi
 435        popl %edi
 436        CFI_ADJUST_CFA_OFFSET -4
 437        CFI_RESTORE edi
 438        popl %ecx                       # equivalent to addl $4,%esp
 439        CFI_ADJUST_CFA_OFFSET -4
 440        ret     
 441        CFI_ENDPROC
 442ENDPROC(csum_partial_copy_generic)
 443
 444#else
 445
 446/* Version for PentiumII/PPro */
 447
 448#define ROUND1(x) \
 449        SRC(movl x(%esi), %ebx  )       ;       \
 450        addl %ebx, %eax                 ;       \
 451        DST(movl %ebx, x(%edi)  )       ; 
 452
 453#define ROUND(x) \
 454        SRC(movl x(%esi), %ebx  )       ;       \
 455        adcl %ebx, %eax                 ;       \
 456        DST(movl %ebx, x(%edi)  )       ;
 457
 458#define ARGBASE 12
 459                
 460ENTRY(csum_partial_copy_generic)
 461        CFI_STARTPROC
 462        pushl %ebx
 463        CFI_ADJUST_CFA_OFFSET 4
 464        CFI_REL_OFFSET ebx, 0
 465        pushl %edi
 466        CFI_ADJUST_CFA_OFFSET 4
 467        CFI_REL_OFFSET edi, 0
 468        pushl %esi
 469        CFI_ADJUST_CFA_OFFSET 4
 470        CFI_REL_OFFSET esi, 0
 471        movl ARGBASE+4(%esp),%esi       #src
 472        movl ARGBASE+8(%esp),%edi       #dst    
 473        movl ARGBASE+12(%esp),%ecx      #len
 474        movl ARGBASE+16(%esp),%eax      #sum
 475#       movl %ecx, %edx  
 476        movl %ecx, %ebx  
 477        movl %esi, %edx
 478        shrl $6, %ecx     
 479        andl $0x3c, %ebx  
 480        negl %ebx
 481        subl %ebx, %esi  
 482        subl %ebx, %edi  
 483        lea  -1(%esi),%edx
 484        andl $-32,%edx
 485        lea 3f(%ebx,%ebx), %ebx
 486        testl %esi, %esi 
 487        jmp *%ebx
 4881:      addl $64,%esi
 489        addl $64,%edi 
 490        SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
 491        ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
 492        ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
 493        ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
 494        ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
 4953:      adcl $0,%eax
 496        addl $64, %edx
 497        dec %ecx
 498        jge 1b
 4994:      movl ARGBASE+12(%esp),%edx      #len
 500        andl $3, %edx
 501        jz 7f
 502        cmpl $2, %edx
 503        jb 5f
 504SRC(    movw (%esi), %dx         )
 505        leal 2(%esi), %esi
 506DST(    movw %dx, (%edi)         )
 507        leal 2(%edi), %edi
 508        je 6f
 509        shll $16,%edx
 5105:
 511SRC(    movb (%esi), %dl         )
 512DST(    movb %dl, (%edi)         )
 5136:      addl %edx, %eax
 514        adcl $0, %eax
 5157:
 516.section .fixup, "ax"
 5176001:   movl    ARGBASE+20(%esp), %ebx  # src_err_ptr   
 518        movl $-EFAULT, (%ebx)
 519        # zero the complete destination (computing the rest is too much work)
 520        movl ARGBASE+8(%esp),%edi       # dst
 521        movl ARGBASE+12(%esp),%ecx      # len
 522        xorl %eax,%eax
 523        rep; stosb
 524        jmp 7b
 5256002:   movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
 526        movl $-EFAULT, (%ebx)
 527        jmp  7b                 
 528.previous                               
 529
 530        popl %esi
 531        CFI_ADJUST_CFA_OFFSET -4
 532        CFI_RESTORE esi
 533        popl %edi
 534        CFI_ADJUST_CFA_OFFSET -4
 535        CFI_RESTORE edi
 536        popl %ebx
 537        CFI_ADJUST_CFA_OFFSET -4
 538        CFI_RESTORE ebx
 539        ret
 540        CFI_ENDPROC
 541ENDPROC(csum_partial_copy_generic)
 542                                
 543#undef ROUND
 544#undef ROUND1           
 545                
 546#endif
 547
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.