linux/arch/x86/kernel/entry_32.S
<<
>>
Prefs
   1/*
   2 *
   3 *  Copyright (C) 1991, 1992  Linus Torvalds
   4 */
   5
   6/*
   7 * entry.S contains the system-call and fault low-level handling routines.
   8 * This also contains the timer-interrupt handler, as well as all interrupts
   9 * and faults that can result in a task-switch.
  10 *
  11 * NOTE: This code handles signal-recognition, which happens every time
  12 * after a timer-interrupt and after each system call.
  13 *
  14 * I changed all the .align's to 4 (16 byte alignment), as that's faster
  15 * on a 486.
  16 *
  17 * Stack layout in 'syscall_exit':
  18 *      ptrace needs to have all regs on the stack.
  19 *      if the order here is changed, it needs to be
  20 *      updated in fork.c:copy_process, signal.c:do_signal,
  21 *      ptrace.c and ptrace.h
  22 *
  23 *       0(%esp) - %ebx
  24 *       4(%esp) - %ecx
  25 *       8(%esp) - %edx
  26 *       C(%esp) - %esi
  27 *      10(%esp) - %edi
  28 *      14(%esp) - %ebp
  29 *      18(%esp) - %eax
  30 *      1C(%esp) - %ds
  31 *      20(%esp) - %es
  32 *      24(%esp) - %fs
  33 *      28(%esp) - %gs          saved iff !CONFIG_X86_32_LAZY_GS
  34 *      2C(%esp) - orig_eax
  35 *      30(%esp) - %eip
  36 *      34(%esp) - %cs
  37 *      38(%esp) - %eflags
  38 *      3C(%esp) - %oldesp
  39 *      40(%esp) - %oldss
  40 *
  41 * "current" is in register %ebx during any slow entries.
  42 */
  43
  44#include <linux/linkage.h>
  45#include <asm/thread_info.h>
  46#include <asm/irqflags.h>
  47#include <asm/errno.h>
  48#include <asm/segment.h>
  49#include <asm/smp.h>
  50#include <asm/page_types.h>
  51#include <asm/percpu.h>
  52#include <asm/dwarf2.h>
  53#include <asm/processor-flags.h>
  54#include <asm/ftrace.h>
  55#include <asm/irq_vectors.h>
  56
  57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  58#include <linux/elf-em.h>
  59#define AUDIT_ARCH_I386         (EM_386|__AUDIT_ARCH_LE)
  60#define __AUDIT_ARCH_LE    0x40000000
  61
  62#ifndef CONFIG_AUDITSYSCALL
  63#define sysenter_audit  syscall_trace_entry
  64#define sysexit_audit   syscall_exit_work
  65#endif
  66
  67/*
  68 * We use macros for low-level operations which need to be overridden
  69 * for paravirtualization.  The following will never clobber any registers:
  70 *   INTERRUPT_RETURN (aka. "iret")
  71 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  72 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  73 *
  74 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  75 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  76 * Allowing a register to be clobbered can shrink the paravirt replacement
  77 * enough to patch inline, increasing performance.
  78 */
  79
  80#define nr_syscalls ((syscall_table_size)/4)
  81
  82#ifdef CONFIG_PREEMPT
  83#define preempt_stop(clobbers)  DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  84#else
  85#define preempt_stop(clobbers)
  86#define resume_kernel           restore_all
  87#endif
  88
  89.macro TRACE_IRQS_IRET
  90#ifdef CONFIG_TRACE_IRQFLAGS
  91        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
  92        jz 1f
  93        TRACE_IRQS_ON
  941:
  95#endif
  96.endm
  97
  98#ifdef CONFIG_VM86
  99#define resume_userspace_sig    check_userspace
 100#else
 101#define resume_userspace_sig    resume_userspace
 102#endif
 103
 104/*
 105 * User gs save/restore
 106 *
 107 * %gs is used for userland TLS and kernel only uses it for stack
 108 * canary which is required to be at %gs:20 by gcc.  Read the comment
 109 * at the top of stackprotector.h for more info.
 110 *
 111 * Local labels 98 and 99 are used.
 112 */
 113#ifdef CONFIG_X86_32_LAZY_GS
 114
 115 /* unfortunately push/pop can't be no-op */
 116.macro PUSH_GS
 117        pushl $0
 118        CFI_ADJUST_CFA_OFFSET 4
 119.endm
 120.macro POP_GS pop=0
 121        addl $(4 + \pop), %esp
 122        CFI_ADJUST_CFA_OFFSET -(4 + \pop)
 123.endm
 124.macro POP_GS_EX
 125.endm
 126
 127 /* all the rest are no-op */
 128.macro PTGS_TO_GS
 129.endm
 130.macro PTGS_TO_GS_EX
 131.endm
 132.macro GS_TO_REG reg
 133.endm
 134.macro REG_TO_PTGS reg
 135.endm
 136.macro SET_KERNEL_GS reg
 137.endm
 138
 139#else   /* CONFIG_X86_32_LAZY_GS */
 140
 141.macro PUSH_GS
 142        pushl %gs
 143        CFI_ADJUST_CFA_OFFSET 4
 144        /*CFI_REL_OFFSET gs, 0*/
 145.endm
 146
 147.macro POP_GS pop=0
 14898:     popl %gs
 149        CFI_ADJUST_CFA_OFFSET -4
 150        /*CFI_RESTORE gs*/
 151  .if \pop <> 0
 152        add $\pop, %esp
 153        CFI_ADJUST_CFA_OFFSET -\pop
 154  .endif
 155.endm
 156.macro POP_GS_EX
 157.pushsection .fixup, "ax"
 15899:     movl $0, (%esp)
 159        jmp 98b
 160.section __ex_table, "a"
 161        .align 4
 162        .long 98b, 99b
 163.popsection
 164.endm
 165
 166.macro PTGS_TO_GS
 16798:     mov PT_GS(%esp), %gs
 168.endm
 169.macro PTGS_TO_GS_EX
 170.pushsection .fixup, "ax"
 17199:     movl $0, PT_GS(%esp)
 172        jmp 98b
 173.section __ex_table, "a"
 174        .align 4
 175        .long 98b, 99b
 176.popsection
 177.endm
 178
 179.macro GS_TO_REG reg
 180        movl %gs, \reg
 181        /*CFI_REGISTER gs, \reg*/
 182.endm
 183.macro REG_TO_PTGS reg
 184        movl \reg, PT_GS(%esp)
 185        /*CFI_REL_OFFSET gs, PT_GS*/
 186.endm
 187.macro SET_KERNEL_GS reg
 188        movl $(__KERNEL_STACK_CANARY), \reg
 189        movl \reg, %gs
 190.endm
 191
 192#endif  /* CONFIG_X86_32_LAZY_GS */
 193
 194.macro SAVE_ALL
 195        cld
 196        PUSH_GS
 197        pushl %fs
 198        CFI_ADJUST_CFA_OFFSET 4
 199        /*CFI_REL_OFFSET fs, 0;*/
 200        pushl %es
 201        CFI_ADJUST_CFA_OFFSET 4
 202        /*CFI_REL_OFFSET es, 0;*/
 203        pushl %ds
 204        CFI_ADJUST_CFA_OFFSET 4
 205        /*CFI_REL_OFFSET ds, 0;*/
 206        pushl %eax
 207        CFI_ADJUST_CFA_OFFSET 4
 208        CFI_REL_OFFSET eax, 0
 209        pushl %ebp
 210        CFI_ADJUST_CFA_OFFSET 4
 211        CFI_REL_OFFSET ebp, 0
 212        pushl %edi
 213        CFI_ADJUST_CFA_OFFSET 4
 214        CFI_REL_OFFSET edi, 0
 215        pushl %esi
 216        CFI_ADJUST_CFA_OFFSET 4
 217        CFI_REL_OFFSET esi, 0
 218        pushl %edx
 219        CFI_ADJUST_CFA_OFFSET 4
 220        CFI_REL_OFFSET edx, 0
 221        pushl %ecx
 222        CFI_ADJUST_CFA_OFFSET 4
 223        CFI_REL_OFFSET ecx, 0
 224        pushl %ebx
 225        CFI_ADJUST_CFA_OFFSET 4
 226        CFI_REL_OFFSET ebx, 0
 227        movl $(__USER_DS), %edx
 228        movl %edx, %ds
 229        movl %edx, %es
 230        movl $(__KERNEL_PERCPU), %edx
 231        movl %edx, %fs
 232        SET_KERNEL_GS %edx
 233.endm
 234
 235.macro RESTORE_INT_REGS
 236        popl %ebx
 237        CFI_ADJUST_CFA_OFFSET -4
 238        CFI_RESTORE ebx
 239        popl %ecx
 240        CFI_ADJUST_CFA_OFFSET -4
 241        CFI_RESTORE ecx
 242        popl %edx
 243        CFI_ADJUST_CFA_OFFSET -4
 244        CFI_RESTORE edx
 245        popl %esi
 246        CFI_ADJUST_CFA_OFFSET -4
 247        CFI_RESTORE esi
 248        popl %edi
 249        CFI_ADJUST_CFA_OFFSET -4
 250        CFI_RESTORE edi
 251        popl %ebp
 252        CFI_ADJUST_CFA_OFFSET -4
 253        CFI_RESTORE ebp
 254        popl %eax
 255        CFI_ADJUST_CFA_OFFSET -4
 256        CFI_RESTORE eax
 257.endm
 258
 259.macro RESTORE_REGS pop=0
 260        RESTORE_INT_REGS
 2611:      popl %ds
 262        CFI_ADJUST_CFA_OFFSET -4
 263        /*CFI_RESTORE ds;*/
 2642:      popl %es
 265        CFI_ADJUST_CFA_OFFSET -4
 266        /*CFI_RESTORE es;*/
 2673:      popl %fs
 268        CFI_ADJUST_CFA_OFFSET -4
 269        /*CFI_RESTORE fs;*/
 270        POP_GS \pop
 271.pushsection .fixup, "ax"
 2724:      movl $0, (%esp)
 273        jmp 1b
 2745:      movl $0, (%esp)
 275        jmp 2b
 2766:      movl $0, (%esp)
 277        jmp 3b
 278.section __ex_table, "a"
 279        .align 4
 280        .long 1b, 4b
 281        .long 2b, 5b
 282        .long 3b, 6b
 283.popsection
 284        POP_GS_EX
 285.endm
 286
 287.macro RING0_INT_FRAME
 288        CFI_STARTPROC simple
 289        CFI_SIGNAL_FRAME
 290        CFI_DEF_CFA esp, 3*4
 291        /*CFI_OFFSET cs, -2*4;*/
 292        CFI_OFFSET eip, -3*4
 293.endm
 294
 295.macro RING0_EC_FRAME
 296        CFI_STARTPROC simple
 297        CFI_SIGNAL_FRAME
 298        CFI_DEF_CFA esp, 4*4
 299        /*CFI_OFFSET cs, -2*4;*/
 300        CFI_OFFSET eip, -3*4
 301.endm
 302
 303.macro RING0_PTREGS_FRAME
 304        CFI_STARTPROC simple
 305        CFI_SIGNAL_FRAME
 306        CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
 307        /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
 308        CFI_OFFSET eip, PT_EIP-PT_OLDESP
 309        /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
 310        /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
 311        CFI_OFFSET eax, PT_EAX-PT_OLDESP
 312        CFI_OFFSET ebp, PT_EBP-PT_OLDESP
 313        CFI_OFFSET edi, PT_EDI-PT_OLDESP
 314        CFI_OFFSET esi, PT_ESI-PT_OLDESP
 315        CFI_OFFSET edx, PT_EDX-PT_OLDESP
 316        CFI_OFFSET ecx, PT_ECX-PT_OLDESP
 317        CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 318.endm
 319
 320ENTRY(ret_from_fork)
 321        CFI_STARTPROC
 322        pushl %eax
 323        CFI_ADJUST_CFA_OFFSET 4
 324        call schedule_tail
 325        GET_THREAD_INFO(%ebp)
 326        popl %eax
 327        CFI_ADJUST_CFA_OFFSET -4
 328        pushl $0x0202                   # Reset kernel eflags
 329        CFI_ADJUST_CFA_OFFSET 4
 330        popfl
 331        CFI_ADJUST_CFA_OFFSET -4
 332        jmp syscall_exit
 333        CFI_ENDPROC
 334END(ret_from_fork)
 335
 336/*
 337 * Interrupt exit functions should be protected against kprobes
 338 */
 339        .pushsection .kprobes.text, "ax"
 340/*
 341 * Return to user mode is not as complex as all this looks,
 342 * but we want the default path for a system call return to
 343 * go as quickly as possible which is why some of this is
 344 * less clear than it otherwise should be.
 345 */
 346
 347        # userspace resumption stub bypassing syscall exit tracing
 348        ALIGN
 349        RING0_PTREGS_FRAME
 350ret_from_exception:
 351        preempt_stop(CLBR_ANY)
 352ret_from_intr:
 353        GET_THREAD_INFO(%ebp)
 354check_userspace:
 355        movl PT_EFLAGS(%esp), %eax      # mix EFLAGS and CS
 356        movb PT_CS(%esp), %al
 357        andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 358        cmpl $USER_RPL, %eax
 359        jb resume_kernel                # not returning to v8086 or userspace
 360
 361ENTRY(resume_userspace)
 362        LOCKDEP_SYS_EXIT
 363        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 364                                        # setting need_resched or sigpending
 365                                        # between sampling and the iret
 366        TRACE_IRQS_OFF
 367        movl TI_flags(%ebp), %ecx
 368        andl $_TIF_WORK_MASK, %ecx      # is there any work to be done on
 369                                        # int/exception return?
 370        jne work_pending
 371        jmp restore_all
 372END(ret_from_exception)
 373
 374#ifdef CONFIG_PREEMPT
 375ENTRY(resume_kernel)
 376        DISABLE_INTERRUPTS(CLBR_ANY)
 377        cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
 378        jnz restore_all
 379need_resched:
 380        movl TI_flags(%ebp), %ecx       # need_resched set ?
 381        testb $_TIF_NEED_RESCHED, %cl
 382        jz restore_all
 383        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)    # interrupts off (exception path) ?
 384        jz restore_all
 385        call preempt_schedule_irq
 386        jmp need_resched
 387END(resume_kernel)
 388#endif
 389        CFI_ENDPROC
 390/*
 391 * End of kprobes section
 392 */
 393        .popsection
 394
 395/* SYSENTER_RETURN points to after the "sysenter" instruction in
 396   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 397
 398        # sysenter call handler stub
 399ENTRY(ia32_sysenter_target)
 400        CFI_STARTPROC simple
 401        CFI_SIGNAL_FRAME
 402        CFI_DEF_CFA esp, 0
 403        CFI_REGISTER esp, ebp
 404        movl TSS_sysenter_sp0(%esp),%esp
 405sysenter_past_esp:
 406        /*
 407         * Interrupts are disabled here, but we can't trace it until
 408         * enough kernel state to call TRACE_IRQS_OFF can be called - but
 409         * we immediately enable interrupts at that point anyway.
 410         */
 411        pushl $(__USER_DS)
 412        CFI_ADJUST_CFA_OFFSET 4
 413        /*CFI_REL_OFFSET ss, 0*/
 414        pushl %ebp
 415        CFI_ADJUST_CFA_OFFSET 4
 416        CFI_REL_OFFSET esp, 0
 417        pushfl
 418        orl $X86_EFLAGS_IF, (%esp)
 419        CFI_ADJUST_CFA_OFFSET 4
 420        pushl $(__USER_CS)
 421        CFI_ADJUST_CFA_OFFSET 4
 422        /*CFI_REL_OFFSET cs, 0*/
 423        /*
 424         * Push current_thread_info()->sysenter_return to the stack.
 425         * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 426         * pushed above; +8 corresponds to copy_thread's esp0 setting.
 427         */
 428        pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
 429        CFI_ADJUST_CFA_OFFSET 4
 430        CFI_REL_OFFSET eip, 0
 431
 432        pushl %eax
 433        CFI_ADJUST_CFA_OFFSET 4
 434        SAVE_ALL
 435        ENABLE_INTERRUPTS(CLBR_NONE)
 436
 437/*
 438 * Load the potential sixth argument from user stack.
 439 * Careful about security.
 440 */
 441        cmpl $__PAGE_OFFSET-3,%ebp
 442        jae syscall_fault
 4431:      movl (%ebp),%ebp
 444        movl %ebp,PT_EBP(%esp)
 445.section __ex_table,"a"
 446        .align 4
 447        .long 1b,syscall_fault
 448.previous
 449
 450        GET_THREAD_INFO(%ebp)
 451
 452        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 453        jnz sysenter_audit
 454sysenter_do_call:
 455        cmpl $(nr_syscalls), %eax
 456        jae syscall_badsys
 457        call *sys_call_table(,%eax,4)
 458        movl %eax,PT_EAX(%esp)
 459        LOCKDEP_SYS_EXIT
 460        DISABLE_INTERRUPTS(CLBR_ANY)
 461        TRACE_IRQS_OFF
 462        movl TI_flags(%ebp), %ecx
 463        testl $_TIF_ALLWORK_MASK, %ecx
 464        jne sysexit_audit
 465sysenter_exit:
 466/* if something modifies registers it must also disable sysexit */
 467        movl PT_EIP(%esp), %edx
 468        movl PT_OLDESP(%esp), %ecx
 469        xorl %ebp,%ebp
 470        TRACE_IRQS_ON
 4711:      mov  PT_FS(%esp), %fs
 472        PTGS_TO_GS
 473        ENABLE_INTERRUPTS_SYSEXIT
 474
 475#ifdef CONFIG_AUDITSYSCALL
 476sysenter_audit:
 477        testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 478        jnz syscall_trace_entry
 479        addl $4,%esp
 480        CFI_ADJUST_CFA_OFFSET -4
 481        /* %esi already in 8(%esp)         6th arg: 4th syscall arg */
 482        /* %edx already in 4(%esp)         5th arg: 3rd syscall arg */
 483        /* %ecx already in 0(%esp)         4th arg: 2nd syscall arg */
 484        movl %ebx,%ecx                  /* 3rd arg: 1st syscall arg */
 485        movl %eax,%edx                  /* 2nd arg: syscall number */
 486        movl $AUDIT_ARCH_I386,%eax      /* 1st arg: audit arch */
 487        call audit_syscall_entry
 488        pushl %ebx
 489        CFI_ADJUST_CFA_OFFSET 4
 490        movl PT_EAX(%esp),%eax          /* reload syscall number */
 491        jmp sysenter_do_call
 492
 493sysexit_audit:
 494        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
 495        jne syscall_exit_work
 496        TRACE_IRQS_ON
 497        ENABLE_INTERRUPTS(CLBR_ANY)
 498        movl %eax,%edx          /* second arg, syscall return value */
 499        cmpl $0,%eax            /* is it < 0? */
 500        setl %al                /* 1 if so, 0 if not */
 501        movzbl %al,%eax         /* zero-extend that */
 502        inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
 503        call audit_syscall_exit
 504        DISABLE_INTERRUPTS(CLBR_ANY)
 505        TRACE_IRQS_OFF
 506        movl TI_flags(%ebp), %ecx
 507        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
 508        jne syscall_exit_work
 509        movl PT_EAX(%esp),%eax  /* reload syscall return value */
 510        jmp sysenter_exit
 511#endif
 512
 513        CFI_ENDPROC
 514.pushsection .fixup,"ax"
 5152:      movl $0,PT_FS(%esp)
 516        jmp 1b
 517.section __ex_table,"a"
 518        .align 4
 519        .long 1b,2b
 520.popsection
 521        PTGS_TO_GS_EX
 522ENDPROC(ia32_sysenter_target)
 523
 524/*
 525 * syscall stub including irq exit should be protected against kprobes
 526 */
 527        .pushsection .kprobes.text, "ax"
 528        # system call handler stub
 529ENTRY(system_call)
 530        RING0_INT_FRAME                 # can't unwind into user space anyway
 531        pushl %eax                      # save orig_eax
 532        CFI_ADJUST_CFA_OFFSET 4
 533        SAVE_ALL
 534        GET_THREAD_INFO(%ebp)
 535                                        # system call tracing in operation / emulation
 536        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 537        jnz syscall_trace_entry
 538        cmpl $(nr_syscalls), %eax
 539        jae syscall_badsys
 540syscall_call:
 541        call *sys_call_table(,%eax,4)
 542        movl %eax,PT_EAX(%esp)          # store the return value
 543syscall_exit:
 544        LOCKDEP_SYS_EXIT
 545        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 546                                        # setting need_resched or sigpending
 547                                        # between sampling and the iret
 548        TRACE_IRQS_OFF
 549        movl TI_flags(%ebp), %ecx
 550        testl $_TIF_ALLWORK_MASK, %ecx  # current->work
 551        jne syscall_exit_work
 552
 553restore_all:
 554        TRACE_IRQS_IRET
 555restore_all_notrace:
 556        movl PT_EFLAGS(%esp), %eax      # mix EFLAGS, SS and CS
 557        # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 558        # are returning to the kernel.
 559        # See comments in process.c:copy_thread() for details.
 560        movb PT_OLDSS(%esp), %ah
 561        movb PT_CS(%esp), %al
 562        andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 563        cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 564        CFI_REMEMBER_STATE
 565        je ldt_ss                       # returning to user-space with LDT SS
 566restore_nocheck:
 567        RESTORE_REGS 4                  # skip orig_eax/error_code
 568        CFI_ADJUST_CFA_OFFSET -4
 569irq_return:
 570        INTERRUPT_RETURN
 571.section .fixup,"ax"
 572ENTRY(iret_exc)
 573        pushl $0                        # no error code
 574        pushl $do_iret_error
 575        jmp error_code
 576.previous
 577.section __ex_table,"a"
 578        .align 4
 579        .long irq_return,iret_exc
 580.previous
 581
 582        CFI_RESTORE_STATE
 583ldt_ss:
 584        larl PT_OLDSS(%esp), %eax
 585        jnz restore_nocheck
 586        testl $0x00400000, %eax         # returning to 32bit stack?
 587        jnz restore_nocheck             # allright, normal return
 588
 589#ifdef CONFIG_PARAVIRT
 590        /*
 591         * The kernel can't run on a non-flat stack if paravirt mode
 592         * is active.  Rather than try to fixup the high bits of
 593         * ESP, bypass this code entirely.  This may break DOSemu
 594         * and/or Wine support in a paravirt VM, although the option
 595         * is still available to implement the setting of the high
 596         * 16-bits in the INTERRUPT_RETURN paravirt-op.
 597         */
 598        cmpl $0, pv_info+PARAVIRT_enabled
 599        jne restore_nocheck
 600#endif
 601
 602/*
 603 * Setup and switch to ESPFIX stack
 604 *
 605 * We're returning to userspace with a 16 bit stack. The CPU will not
 606 * restore the high word of ESP for us on executing iret... This is an
 607 * "official" bug of all the x86-compatible CPUs, which we can work
 608 * around to make dosemu and wine happy. We do this by preloading the
 609 * high word of ESP with the high word of the userspace ESP while
 610 * compensating for the offset by changing to the ESPFIX segment with
 611 * a base address that matches for the difference.
 612 */
 613        mov %esp, %edx                  /* load kernel esp */
 614        mov PT_OLDESP(%esp), %eax       /* load userspace esp */
 615        mov %dx, %ax                    /* eax: new kernel esp */
 616        sub %eax, %edx                  /* offset (low word is 0) */
 617        PER_CPU(gdt_page, %ebx)
 618        shr $16, %edx
 619        mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
 620        mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
 621        pushl $__ESPFIX_SS
 622        CFI_ADJUST_CFA_OFFSET 4
 623        push %eax                       /* new kernel esp */
 624        CFI_ADJUST_CFA_OFFSET 4
 625        /* Disable interrupts, but do not irqtrace this section: we
 626         * will soon execute iret and the tracer was already set to
 627         * the irqstate after the iret */
 628        DISABLE_INTERRUPTS(CLBR_EAX)
 629        lss (%esp), %esp                /* switch to espfix segment */
 630        CFI_ADJUST_CFA_OFFSET -8
 631        jmp restore_nocheck
 632        CFI_ENDPROC
 633ENDPROC(system_call)
 634
 635        # perform work that needs to be done immediately before resumption
 636        ALIGN
 637        RING0_PTREGS_FRAME              # can't unwind into user space anyway
 638work_pending:
 639        testb $_TIF_NEED_RESCHED, %cl
 640        jz work_notifysig
 641work_resched:
 642        call schedule
 643        LOCKDEP_SYS_EXIT
 644        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 645                                        # setting need_resched or sigpending
 646                                        # between sampling and the iret
 647        TRACE_IRQS_OFF
 648        movl TI_flags(%ebp), %ecx
 649        andl $_TIF_WORK_MASK, %ecx      # is there any work to be done other
 650                                        # than syscall tracing?
 651        jz restore_all
 652        testb $_TIF_NEED_RESCHED, %cl
 653        jnz work_resched
 654
 655work_notifysig:                         # deal with pending signals and
 656                                        # notify-resume requests
 657#ifdef CONFIG_VM86
 658        testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 659        movl %esp, %eax
 660        jne work_notifysig_v86          # returning to kernel-space or
 661                                        # vm86-space
 662        xorl %edx, %edx
 663        call do_notify_resume
 664        jmp resume_userspace_sig
 665
 666        ALIGN
 667work_notifysig_v86:
 668        pushl %ecx                      # save ti_flags for do_notify_resume
 669        CFI_ADJUST_CFA_OFFSET 4
 670        call save_v86_state             # %eax contains pt_regs pointer
 671        popl %ecx
 672        CFI_ADJUST_CFA_OFFSET -4
 673        movl %eax, %esp
 674#else
 675        movl %esp, %eax
 676#endif
 677        xorl %edx, %edx
 678        call do_notify_resume
 679        jmp resume_userspace_sig
 680END(work_pending)
 681
 682        # perform syscall exit tracing
 683        ALIGN
 684syscall_trace_entry:
 685        movl $-ENOSYS,PT_EAX(%esp)
 686        movl %esp, %eax
 687        call syscall_trace_enter
 688        /* What it returned is what we'll actually use.  */
 689        cmpl $(nr_syscalls), %eax
 690        jnae syscall_call
 691        jmp syscall_exit
 692END(syscall_trace_entry)
 693
 694        # perform syscall exit tracing
 695        ALIGN
 696syscall_exit_work:
 697        testl $_TIF_WORK_SYSCALL_EXIT, %ecx
 698        jz work_pending
 699        TRACE_IRQS_ON
 700        ENABLE_INTERRUPTS(CLBR_ANY)     # could let syscall_trace_leave() call
 701                                        # schedule() instead
 702        movl %esp, %eax
 703        call syscall_trace_leave
 704        jmp resume_userspace
 705END(syscall_exit_work)
 706        CFI_ENDPROC
 707
 708        RING0_INT_FRAME                 # can't unwind into user space anyway
 709syscall_fault:
 710        GET_THREAD_INFO(%ebp)
 711        movl $-EFAULT,PT_EAX(%esp)
 712        jmp resume_userspace
 713END(syscall_fault)
 714
 715syscall_badsys:
 716        movl $-ENOSYS,PT_EAX(%esp)
 717        jmp resume_userspace
 718END(syscall_badsys)
 719        CFI_ENDPROC
 720/*
 721 * End of kprobes section
 722 */
 723        .popsection
 724
 725/*
 726 * System calls that need a pt_regs pointer.
 727 */
 728#define PTREGSCALL0(name) \
 729        ALIGN; \
 730ptregs_##name: \
 731        leal 4(%esp),%eax; \
 732        jmp sys_##name;
 733
 734#define PTREGSCALL1(name) \
 735        ALIGN; \
 736ptregs_##name: \
 737        leal 4(%esp),%edx; \
 738        movl (PT_EBX+4)(%esp),%eax; \
 739        jmp sys_##name;
 740
 741#define PTREGSCALL2(name) \
 742        ALIGN; \
 743ptregs_##name: \
 744        leal 4(%esp),%ecx; \
 745        movl (PT_ECX+4)(%esp),%edx; \
 746        movl (PT_EBX+4)(%esp),%eax; \
 747        jmp sys_##name;
 748
 749#define PTREGSCALL3(name) \
 750        ALIGN; \
 751ptregs_##name: \
 752        leal 4(%esp),%eax; \
 753        pushl %eax; \
 754        movl PT_EDX(%eax),%ecx; \
 755        movl PT_ECX(%eax),%edx; \
 756        movl PT_EBX(%eax),%eax; \
 757        call sys_##name; \
 758        addl $4,%esp; \
 759        ret
 760
 761PTREGSCALL1(iopl)
 762PTREGSCALL0(fork)
 763PTREGSCALL0(vfork)
 764PTREGSCALL3(execve)
 765PTREGSCALL2(sigaltstack)
 766PTREGSCALL0(sigreturn)
 767PTREGSCALL0(rt_sigreturn)
 768PTREGSCALL2(vm86)
 769PTREGSCALL1(vm86old)
 770
 771/* Clone is an oddball.  The 4th arg is in %edi */
 772        ALIGN;
 773ptregs_clone:
 774        leal 4(%esp),%eax
 775        pushl %eax
 776        pushl PT_EDI(%eax)
 777        movl PT_EDX(%eax),%ecx
 778        movl PT_ECX(%eax),%edx
 779        movl PT_EBX(%eax),%eax
 780        call sys_clone
 781        addl $8,%esp
 782        ret
 783
 784.macro FIXUP_ESPFIX_STACK
 785/*
 786 * Switch back for ESPFIX stack to the normal zerobased stack
 787 *
 788 * We can't call C functions using the ESPFIX stack. This code reads
 789 * the high word of the segment base from the GDT and swiches to the
 790 * normal stack and adjusts ESP with the matching offset.
 791 */
 792        /* fixup the stack */
 793        PER_CPU(gdt_page, %ebx)
 794        mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
 795        mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
 796        shl $16, %eax
 797        addl %esp, %eax                 /* the adjusted stack pointer */
 798        pushl $__KERNEL_DS
 799        CFI_ADJUST_CFA_OFFSET 4
 800        pushl %eax
 801        CFI_ADJUST_CFA_OFFSET 4
 802        lss (%esp), %esp                /* switch to the normal stack segment */
 803        CFI_ADJUST_CFA_OFFSET -8
 804.endm
 805.macro UNWIND_ESPFIX_STACK
 806        movl %ss, %eax
 807        /* see if on espfix stack */
 808        cmpw $__ESPFIX_SS, %ax
 809        jne 27f
 810        movl $__KERNEL_DS, %eax
 811        movl %eax, %ds
 812        movl %eax, %es
 813        /* switch to normal stack */
 814        FIXUP_ESPFIX_STACK
 81527:
 816.endm
 817
 818/*
 819 * Build the entry stubs and pointer table with some assembler magic.
 820 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 821 * single cache line on all modern x86 implementations.
 822 */
 823.section .init.rodata,"a"
 824ENTRY(interrupt)
 825.text
 826        .p2align 5
 827        .p2align CONFIG_X86_L1_CACHE_SHIFT
 828ENTRY(irq_entries_start)
 829        RING0_INT_FRAME
 830vector=FIRST_EXTERNAL_VECTOR
 831.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
 832        .balign 32
 833  .rept 7
 834    .if vector < NR_VECTORS
 835      .if vector <> FIRST_EXTERNAL_VECTOR
 836        CFI_ADJUST_CFA_OFFSET -4
 837      .endif
 8381:      pushl $(~vector+0x80)   /* Note: always in signed byte range */
 839        CFI_ADJUST_CFA_OFFSET 4
 840      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 841        jmp 2f
 842      .endif
 843      .previous
 844        .long 1b
 845      .text
 846vector=vector+1
 847    .endif
 848  .endr
 8492:      jmp common_interrupt
 850.endr
 851END(irq_entries_start)
 852
 853.previous
 854END(interrupt)
 855.previous
 856
 857/*
 858 * the CPU automatically disables interrupts when executing an IRQ vector,
 859 * so IRQ-flags tracing has to follow that:
 860 */
 861        .p2align CONFIG_X86_L1_CACHE_SHIFT
 862common_interrupt:
 863        addl $-0x80,(%esp)      /* Adjust vector into the [-256,-1] range */
 864        SAVE_ALL
 865        TRACE_IRQS_OFF
 866        movl %esp,%eax
 867        call do_IRQ
 868        jmp ret_from_intr
 869ENDPROC(common_interrupt)
 870        CFI_ENDPROC
 871
 872/*
 873 *  Irq entries should be protected against kprobes
 874 */
 875        .pushsection .kprobes.text, "ax"
 876#define BUILD_INTERRUPT3(name, nr, fn)  \
 877ENTRY(name)                             \
 878        RING0_INT_FRAME;                \
 879        pushl $~(nr);                   \
 880        CFI_ADJUST_CFA_OFFSET 4;        \
 881        SAVE_ALL;                       \
 882        TRACE_IRQS_OFF                  \
 883        movl %esp,%eax;                 \
 884        call fn;                        \
 885        jmp ret_from_intr;              \
 886        CFI_ENDPROC;                    \
 887ENDPROC(name)
 888
 889#define BUILD_INTERRUPT(name, nr)       BUILD_INTERRUPT3(name, nr, smp_##name)
 890
 891/* The include is where all of the SMP etc. interrupts come from */
 892#include <asm/entry_arch.h>
 893
 894ENTRY(coprocessor_error)
 895        RING0_INT_FRAME
 896        pushl $0
 897        CFI_ADJUST_CFA_OFFSET 4
 898        pushl $do_coprocessor_error
 899        CFI_ADJUST_CFA_OFFSET 4
 900        jmp error_code
 901        CFI_ENDPROC
 902END(coprocessor_error)
 903
 904ENTRY(simd_coprocessor_error)
 905        RING0_INT_FRAME
 906        pushl $0
 907        CFI_ADJUST_CFA_OFFSET 4
 908        pushl $do_simd_coprocessor_error
 909        CFI_ADJUST_CFA_OFFSET 4
 910        jmp error_code
 911        CFI_ENDPROC
 912END(simd_coprocessor_error)
 913
 914ENTRY(device_not_available)
 915        RING0_INT_FRAME
 916        pushl $-1                       # mark this as an int
 917        CFI_ADJUST_CFA_OFFSET 4
 918        pushl $do_device_not_available
 919        CFI_ADJUST_CFA_OFFSET 4
 920        jmp error_code
 921        CFI_ENDPROC
 922END(device_not_available)
 923
 924#ifdef CONFIG_PARAVIRT
 925ENTRY(native_iret)
 926        iret
 927.section __ex_table,"a"
 928        .align 4
 929        .long native_iret, iret_exc
 930.previous
 931END(native_iret)
 932
 933ENTRY(native_irq_enable_sysexit)
 934        sti
 935        sysexit
 936END(native_irq_enable_sysexit)
 937#endif
 938
 939ENTRY(overflow)
 940        RING0_INT_FRAME
 941        pushl $0
 942        CFI_ADJUST_CFA_OFFSET 4
 943        pushl $do_overflow
 944        CFI_ADJUST_CFA_OFFSET 4
 945        jmp error_code
 946        CFI_ENDPROC
 947END(overflow)
 948
 949ENTRY(bounds)
 950        RING0_INT_FRAME
 951        pushl $0
 952        CFI_ADJUST_CFA_OFFSET 4
 953        pushl $do_bounds
 954        CFI_ADJUST_CFA_OFFSET 4
 955        jmp error_code
 956        CFI_ENDPROC
 957END(bounds)
 958
 959ENTRY(invalid_op)
 960        RING0_INT_FRAME
 961        pushl $0
 962        CFI_ADJUST_CFA_OFFSET 4
 963        pushl $do_invalid_op
 964        CFI_ADJUST_CFA_OFFSET 4
 965        jmp error_code
 966        CFI_ENDPROC
 967END(invalid_op)
 968
 969ENTRY(coprocessor_segment_overrun)
 970        RING0_INT_FRAME
 971        pushl $0
 972        CFI_ADJUST_CFA_OFFSET 4
 973        pushl $do_coprocessor_segment_overrun
 974        CFI_ADJUST_CFA_OFFSET 4
 975        jmp error_code
 976        CFI_ENDPROC
 977END(coprocessor_segment_overrun)
 978
 979ENTRY(invalid_TSS)
 980        RING0_EC_FRAME
 981        pushl $do_invalid_TSS
 982        CFI_ADJUST_CFA_OFFSET 4
 983        jmp error_code
 984        CFI_ENDPROC
 985END(invalid_TSS)
 986
 987ENTRY(segment_not_present)
 988        RING0_EC_FRAME
 989        pushl $do_segment_not_present
 990        CFI_ADJUST_CFA_OFFSET 4
 991        jmp error_code
 992        CFI_ENDPROC
 993END(segment_not_present)
 994
 995ENTRY(stack_segment)
 996        RING0_EC_FRAME
 997        pushl $do_stack_segment
 998        CFI_ADJUST_CFA_OFFSET 4
 999        jmp error_code
1000        CFI_ENDPROC
1001END(stack_segment)
1002
1003ENTRY(alignment_check)
1004        RING0_EC_FRAME
1005        pushl $do_alignment_check
1006        CFI_ADJUST_CFA_OFFSET 4
1007        jmp error_code
1008        CFI_ENDPROC
1009END(alignment_check)
1010
1011ENTRY(divide_error)
1012        RING0_INT_FRAME
1013        pushl $0                        # no error code
1014        CFI_ADJUST_CFA_OFFSET 4
1015        pushl $do_divide_error
1016        CFI_ADJUST_CFA_OFFSET 4
1017        jmp error_code
1018        CFI_ENDPROC
1019END(divide_error)
1020
1021#ifdef CONFIG_X86_MCE
1022ENTRY(machine_check)
1023        RING0_INT_FRAME
1024        pushl $0
1025        CFI_ADJUST_CFA_OFFSET 4
1026        pushl machine_check_vector
1027        CFI_ADJUST_CFA_OFFSET 4
1028        jmp error_code
1029        CFI_ENDPROC
1030END(machine_check)
1031#endif
1032
1033ENTRY(spurious_interrupt_bug)
1034        RING0_INT_FRAME
1035        pushl $0
1036        CFI_ADJUST_CFA_OFFSET 4
1037        pushl $do_spurious_interrupt_bug
1038        CFI_ADJUST_CFA_OFFSET 4
1039        jmp error_code
1040        CFI_ENDPROC
1041END(spurious_interrupt_bug)
1042/*
1043 * End of kprobes section
1044 */
1045        .popsection
1046
1047ENTRY(kernel_thread_helper)
1048        pushl $0                # fake return address for unwinder
1049        CFI_STARTPROC
1050        movl %edi,%eax
1051        call *%esi
1052        call do_exit
1053        ud2                     # padding for call trace
1054        CFI_ENDPROC
1055ENDPROC(kernel_thread_helper)
1056
1057#ifdef CONFIG_XEN
1058/* Xen doesn't set %esp to be precisely what the normal sysenter
1059   entrypoint expects, so fix it up before using the normal path. */
1060ENTRY(xen_sysenter_target)
1061        RING0_INT_FRAME
1062        addl $5*4, %esp         /* remove xen-provided frame */
1063        CFI_ADJUST_CFA_OFFSET -5*4
1064        jmp sysenter_past_esp
1065        CFI_ENDPROC
1066
1067ENTRY(xen_hypervisor_callback)
1068        CFI_STARTPROC
1069        pushl $0
1070        CFI_ADJUST_CFA_OFFSET 4
1071        SAVE_ALL
1072        TRACE_IRQS_OFF
1073
1074        /* Check to see if we got the event in the critical
1075           region in xen_iret_direct, after we've reenabled
1076           events and checked for pending events.  This simulates
1077           iret instruction's behaviour where it delivers a
1078           pending interrupt when enabling interrupts. */
1079        movl PT_EIP(%esp),%eax
1080        cmpl $xen_iret_start_crit,%eax
1081        jb   1f
1082        cmpl $xen_iret_end_crit,%eax
1083        jae  1f
1084
1085        jmp  xen_iret_crit_fixup
1086
1087ENTRY(xen_do_upcall)
10881:      mov %esp, %eax
1089        call xen_evtchn_do_upcall
1090        jmp  ret_from_intr
1091        CFI_ENDPROC
1092ENDPROC(xen_hypervisor_callback)
1093
1094# Hypervisor uses this for application faults while it executes.
1095# We get here for two reasons:
1096#  1. Fault while reloading DS, ES, FS or GS
1097#  2. Fault while executing IRET
1098# Category 1 we fix up by reattempting the load, and zeroing the segment
1099# register if the load fails.
1100# Category 2 we fix up by jumping to do_iret_error. We cannot use the
1101# normal Linux return path in this case because if we use the IRET hypercall
1102# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1103# We distinguish between categories by maintaining a status value in EAX.
1104ENTRY(xen_failsafe_callback)
1105        CFI_STARTPROC
1106        pushl %eax
1107        CFI_ADJUST_CFA_OFFSET 4
1108        movl $1,%eax
11091:      mov 4(%esp),%ds
11102:      mov 8(%esp),%es
11113:      mov 12(%esp),%fs
11124:      mov 16(%esp),%gs
1113        testl %eax,%eax
1114        popl %eax
1115        CFI_ADJUST_CFA_OFFSET -4
1116        lea 16(%esp),%esp
1117        CFI_ADJUST_CFA_OFFSET -16
1118        jz 5f
1119        addl $16,%esp
1120        jmp iret_exc            # EAX != 0 => Category 2 (Bad IRET)
11215:      pushl $0                # EAX == 0 => Category 1 (Bad segment)
1122        CFI_ADJUST_CFA_OFFSET 4
1123        SAVE_ALL
1124        jmp ret_from_exception
1125        CFI_ENDPROC
1126
1127.section .fixup,"ax"
11286:      xorl %eax,%eax
1129        movl %eax,4(%esp)
1130        jmp 1b
11317:      xorl %eax,%eax
1132        movl %eax,8(%esp)
1133        jmp 2b
11348:      xorl %eax,%eax
1135        movl %eax,12(%esp)
1136        jmp 3b
11379:      xorl %eax,%eax
1138        movl %eax,16(%esp)
1139        jmp 4b
1140.previous
1141.section __ex_table,"a"
1142        .align 4
1143        .long 1b,6b
1144        .long 2b,7b
1145        .long 3b,8b
1146        .long 4b,9b
1147.previous
1148ENDPROC(xen_failsafe_callback)
1149
1150#endif  /* CONFIG_XEN */
1151
1152#ifdef CONFIG_FUNCTION_TRACER
1153#ifdef CONFIG_DYNAMIC_FTRACE
1154
1155ENTRY(mcount)
1156        ret
1157END(mcount)
1158
1159ENTRY(ftrace_caller)
1160        cmpl $0, function_trace_stop
1161        jne  ftrace_stub
1162
1163        pushl %eax
1164        pushl %ecx
1165        pushl %edx
1166        movl 0xc(%esp), %eax
1167        movl 0x4(%ebp), %edx
1168        subl $MCOUNT_INSN_SIZE, %eax
1169
1170.globl ftrace_call
1171ftrace_call:
1172        call ftrace_stub
1173
1174        popl %edx
1175        popl %ecx
1176        popl %eax
1177#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1178.globl ftrace_graph_call
1179ftrace_graph_call:
1180        jmp ftrace_stub
1181#endif
1182
1183.globl ftrace_stub
1184ftrace_stub:
1185        ret
1186END(ftrace_caller)
1187
1188#else /* ! CONFIG_DYNAMIC_FTRACE */
1189
1190ENTRY(mcount)
1191        cmpl $0, function_trace_stop
1192        jne  ftrace_stub
1193
1194        cmpl $ftrace_stub, ftrace_trace_function
1195        jnz trace
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197        cmpl $ftrace_stub, ftrace_graph_return
1198        jnz ftrace_graph_caller
1199
1200        cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1201        jnz ftrace_graph_caller
1202#endif
1203.globl ftrace_stub
1204ftrace_stub:
1205        ret
1206
1207        /* taken from glibc */
1208trace:
1209        pushl %eax
1210        pushl %ecx
1211        pushl %edx
1212        movl 0xc(%esp), %eax
1213        movl 0x4(%ebp), %edx
1214        subl $MCOUNT_INSN_SIZE, %eax
1215
1216        call *ftrace_trace_function
1217
1218        popl %edx
1219        popl %ecx
1220        popl %eax
1221        jmp ftrace_stub
1222END(mcount)
1223#endif /* CONFIG_DYNAMIC_FTRACE */
1224#endif /* CONFIG_FUNCTION_TRACER */
1225
1226#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1227ENTRY(ftrace_graph_caller)
1228        cmpl $0, function_trace_stop
1229        jne ftrace_stub
1230
1231        pushl %eax
1232        pushl %ecx
1233        pushl %edx
1234        movl 0xc(%esp), %edx
1235        lea 0x4(%ebp), %eax
1236        movl (%ebp), %ecx
1237        subl $MCOUNT_INSN_SIZE, %edx
1238        call prepare_ftrace_return
1239        popl %edx
1240        popl %ecx
1241        popl %eax
1242        ret
1243END(ftrace_graph_caller)
1244
1245.globl return_to_handler
1246return_to_handler:
1247        pushl %eax
1248        pushl %edx
1249        movl %ebp, %eax
1250        call ftrace_return_to_handler
1251        movl %eax, %ecx
1252        popl %edx
1253        popl %eax
1254        jmp *%ecx
1255#endif
1256
1257.section .rodata,"a"
1258#include "syscall_table_32.S"
1259
1260syscall_table_size=(.-sys_call_table)
1261
1262/*
1263 * Some functions should be protected against kprobes
1264 */
1265        .pushsection .kprobes.text, "ax"
1266
1267ENTRY(page_fault)
1268        RING0_EC_FRAME
1269        pushl $do_page_fault
1270        CFI_ADJUST_CFA_OFFSET 4
1271        ALIGN
1272error_code:
1273        /* the function address is in %gs's slot on the stack */
1274        pushl %fs
1275        CFI_ADJUST_CFA_OFFSET 4
1276        /*CFI_REL_OFFSET fs, 0*/
1277        pushl %es
1278        CFI_ADJUST_CFA_OFFSET 4
1279        /*CFI_REL_OFFSET es, 0*/
1280        pushl %ds
1281        CFI_ADJUST_CFA_OFFSET 4
1282        /*CFI_REL_OFFSET ds, 0*/
1283        pushl %eax
1284        CFI_ADJUST_CFA_OFFSET 4
1285        CFI_REL_OFFSET eax, 0
1286        pushl %ebp
1287        CFI_ADJUST_CFA_OFFSET 4
1288        CFI_REL_OFFSET ebp, 0
1289        pushl %edi
1290        CFI_ADJUST_CFA_OFFSET 4
1291        CFI_REL_OFFSET edi, 0
1292        pushl %esi
1293        CFI_ADJUST_CFA_OFFSET 4
1294        CFI_REL_OFFSET esi, 0
1295        pushl %edx
1296        CFI_ADJUST_CFA_OFFSET 4
1297        CFI_REL_OFFSET edx, 0
1298        pushl %ecx
1299        CFI_ADJUST_CFA_OFFSET 4
1300        CFI_REL_OFFSET ecx, 0
1301        pushl %ebx
1302        CFI_ADJUST_CFA_OFFSET 4
1303        CFI_REL_OFFSET ebx, 0
1304        cld
1305        movl $(__KERNEL_PERCPU), %ecx
1306        movl %ecx, %fs
1307        UNWIND_ESPFIX_STACK
1308        GS_TO_REG %ecx
1309        movl PT_GS(%esp), %edi          # get the function address
1310        movl PT_ORIG_EAX(%esp), %edx    # get the error code
1311        movl $-1, PT_ORIG_EAX(%esp)     # no syscall to restart
1312        REG_TO_PTGS %ecx
1313        SET_KERNEL_GS %ecx
1314        movl $(__USER_DS), %ecx
1315        movl %ecx, %ds
1316        movl %ecx, %es
1317        TRACE_IRQS_OFF
1318        movl %esp,%eax                  # pt_regs pointer
1319        call *%edi
1320        jmp ret_from_exception
1321        CFI_ENDPROC
1322END(page_fault)
1323
1324/*
1325 * Debug traps and NMI can happen at the one SYSENTER instruction
1326 * that sets up the real kernel stack. Check here, since we can't
1327 * allow the wrong stack to be used.
1328 *
1329 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1330 * already pushed 3 words if it hits on the sysenter instruction:
1331 * eflags, cs and eip.
1332 *
1333 * We just load the right stack, and push the three (known) values
1334 * by hand onto the new stack - while updating the return eip past
1335 * the instruction that would have done it for sysenter.
1336 */
1337.macro FIX_STACK offset ok label
1338        cmpw $__KERNEL_CS, 4(%esp)
1339        jne \ok
1340\label:
1341        movl TSS_sysenter_sp0 + \offset(%esp), %esp
1342        CFI_DEF_CFA esp, 0
1343        CFI_UNDEFINED eip
1344        pushfl
1345        CFI_ADJUST_CFA_OFFSET 4
1346        pushl $__KERNEL_CS
1347        CFI_ADJUST_CFA_OFFSET 4
1348        pushl $sysenter_past_esp
1349        CFI_ADJUST_CFA_OFFSET 4
1350        CFI_REL_OFFSET eip, 0
1351.endm
1352
1353ENTRY(debug)
1354        RING0_INT_FRAME
1355        cmpl $ia32_sysenter_target,(%esp)
1356        jne debug_stack_correct
1357        FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1358debug_stack_correct:
1359        pushl $-1                       # mark this as an int
1360        CFI_ADJUST_CFA_OFFSET 4
1361        SAVE_ALL
1362        TRACE_IRQS_OFF
1363        xorl %edx,%edx                  # error code 0
1364        movl %esp,%eax                  # pt_regs pointer
1365        call do_debug
1366        jmp ret_from_exception
1367        CFI_ENDPROC
1368END(debug)
1369
1370/*
1371 * NMI is doubly nasty. It can happen _while_ we're handling
1372 * a debug fault, and the debug fault hasn't yet been able to
1373 * clear up the stack. So we first check whether we got  an
1374 * NMI on the sysenter entry path, but after that we need to
1375 * check whether we got an NMI on the debug path where the debug
1376 * fault happened on the sysenter path.
1377 */
1378ENTRY(nmi)
1379        RING0_INT_FRAME
1380        pushl %eax
1381        CFI_ADJUST_CFA_OFFSET 4
1382        movl %ss, %eax
1383        cmpw $__ESPFIX_SS, %ax
1384        popl %eax
1385        CFI_ADJUST_CFA_OFFSET -4
1386        je nmi_espfix_stack
1387        cmpl $ia32_sysenter_target,(%esp)
1388        je nmi_stack_fixup
1389        pushl %eax
1390        CFI_ADJUST_CFA_OFFSET 4
1391        movl %esp,%eax
1392        /* Do not access memory above the end of our stack page,
1393         * it might not exist.
1394         */
1395        andl $(THREAD_SIZE-1),%eax
1396        cmpl $(THREAD_SIZE-20),%eax
1397        popl %eax
1398        CFI_ADJUST_CFA_OFFSET -4
1399        jae nmi_stack_correct
1400        cmpl $ia32_sysenter_target,12(%esp)
1401        je nmi_debug_stack_check
1402nmi_stack_correct:
1403        /* We have a RING0_INT_FRAME here */
1404        pushl %eax
1405        CFI_ADJUST_CFA_OFFSET 4
1406        SAVE_ALL
1407        xorl %edx,%edx          # zero error code
1408        movl %esp,%eax          # pt_regs pointer
1409        call do_nmi
1410        jmp restore_all_notrace
1411        CFI_ENDPROC
1412
1413nmi_stack_fixup:
1414        RING0_INT_FRAME
1415        FIX_STACK 12, nmi_stack_correct, 1
1416        jmp nmi_stack_correct
1417
1418nmi_debug_stack_check:
1419        /* We have a RING0_INT_FRAME here */
1420        cmpw $__KERNEL_CS,16(%esp)
1421        jne nmi_stack_correct
1422        cmpl $debug,(%esp)
1423        jb nmi_stack_correct
1424        cmpl $debug_esp_fix_insn,(%esp)
1425        ja nmi_stack_correct
1426        FIX_STACK 24, nmi_stack_correct, 1
1427        jmp nmi_stack_correct
1428
1429nmi_espfix_stack:
1430        /* We have a RING0_INT_FRAME here.
1431         *
1432         * create the pointer to lss back
1433         */
1434        pushl %ss
1435        CFI_ADJUST_CFA_OFFSET 4
1436        pushl %esp
1437        CFI_ADJUST_CFA_OFFSET 4
1438        addl $4, (%esp)
1439        /* copy the iret frame of 12 bytes */
1440        .rept 3
1441        pushl 16(%esp)
1442        CFI_ADJUST_CFA_OFFSET 4
1443        .endr
1444        pushl %eax
1445        CFI_ADJUST_CFA_OFFSET 4
1446        SAVE_ALL
1447        FIXUP_ESPFIX_STACK              # %eax == %esp
1448        xorl %edx,%edx                  # zero error code
1449        call do_nmi
1450        RESTORE_REGS
1451        lss 12+4(%esp), %esp            # back to espfix stack
1452        CFI_ADJUST_CFA_OFFSET -24
1453        jmp irq_return
1454        CFI_ENDPROC
1455END(nmi)
1456
1457ENTRY(int3)
1458        RING0_INT_FRAME
1459        pushl $-1                       # mark this as an int
1460        CFI_ADJUST_CFA_OFFSET 4
1461        SAVE_ALL
1462        TRACE_IRQS_OFF
1463        xorl %edx,%edx          # zero error code
1464        movl %esp,%eax          # pt_regs pointer
1465        call do_int3
1466        jmp ret_from_exception
1467        CFI_ENDPROC
1468END(int3)
1469
1470ENTRY(general_protection)
1471        RING0_EC_FRAME
1472        pushl $do_general_protection
1473        CFI_ADJUST_CFA_OFFSET 4
1474        jmp error_code
1475        CFI_ENDPROC
1476END(general_protection)
1477
1478/*
1479 * End of kprobes section
1480 */
1481        .popsection
1482
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.