linux/arch/x86/kernel/entry_32.S
<<
>>
Prefs
   1/*
   2 *
   3 *  Copyright (C) 1991, 1992  Linus Torvalds
   4 */
   5
   6/*
   7 * entry.S contains the system-call and fault low-level handling routines.
   8 * This also contains the timer-interrupt handler, as well as all interrupts
   9 * and faults that can result in a task-switch.
  10 *
  11 * NOTE: This code handles signal-recognition, which happens every time
  12 * after a timer-interrupt and after each system call.
  13 *
  14 * I changed all the .align's to 4 (16 byte alignment), as that's faster
  15 * on a 486.
  16 *
  17 * Stack layout in 'syscall_exit':
  18 *      ptrace needs to have all regs on the stack.
  19 *      if the order here is changed, it needs to be
  20 *      updated in fork.c:copy_process, signal.c:do_signal,
  21 *      ptrace.c and ptrace.h
  22 *
  23 *       0(%esp) - %ebx
  24 *       4(%esp) - %ecx
  25 *       8(%esp) - %edx
  26 *       C(%esp) - %esi
  27 *      10(%esp) - %edi
  28 *      14(%esp) - %ebp
  29 *      18(%esp) - %eax
  30 *      1C(%esp) - %ds
  31 *      20(%esp) - %es
  32 *      24(%esp) - %fs
  33 *      28(%esp) - orig_eax
  34 *      2C(%esp) - %eip
  35 *      30(%esp) - %cs
  36 *      34(%esp) - %eflags
  37 *      38(%esp) - %oldesp
  38 *      3C(%esp) - %oldss
  39 *
  40 * "current" is in register %ebx during any slow entries.
  41 */
  42
  43#include <linux/linkage.h>
  44#include <asm/thread_info.h>
  45#include <asm/irqflags.h>
  46#include <asm/errno.h>
  47#include <asm/segment.h>
  48#include <asm/smp.h>
  49#include <asm/page.h>
  50#include <asm/desc.h>
  51#include <asm/percpu.h>
  52#include <asm/dwarf2.h>
  53#include <asm/processor-flags.h>
  54#include <asm/ftrace.h>
  55#include <asm/irq_vectors.h>
  56
  57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  58#include <linux/elf-em.h>
  59#define AUDIT_ARCH_I386         (EM_386|__AUDIT_ARCH_LE)
  60#define __AUDIT_ARCH_LE    0x40000000
  61
  62#ifndef CONFIG_AUDITSYSCALL
  63#define sysenter_audit  syscall_trace_entry
  64#define sysexit_audit   syscall_exit_work
  65#endif
  66
  67/*
  68 * We use macros for low-level operations which need to be overridden
  69 * for paravirtualization.  The following will never clobber any registers:
  70 *   INTERRUPT_RETURN (aka. "iret")
  71 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  72 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  73 *
  74 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  75 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  76 * Allowing a register to be clobbered can shrink the paravirt replacement
  77 * enough to patch inline, increasing performance.
  78 */
  79
  80#define nr_syscalls ((syscall_table_size)/4)
  81
  82#ifdef CONFIG_PREEMPT
  83#define preempt_stop(clobbers)  DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  84#else
  85#define preempt_stop(clobbers)
  86#define resume_kernel           restore_nocheck
  87#endif
  88
  89.macro TRACE_IRQS_IRET
  90#ifdef CONFIG_TRACE_IRQFLAGS
  91        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
  92        jz 1f
  93        TRACE_IRQS_ON
  941:
  95#endif
  96.endm
  97
  98#ifdef CONFIG_VM86
  99#define resume_userspace_sig    check_userspace
 100#else
 101#define resume_userspace_sig    resume_userspace
 102#endif
 103
 104#define SAVE_ALL \
 105        cld; \
 106        pushl %fs; \
 107        CFI_ADJUST_CFA_OFFSET 4;\
 108        /*CFI_REL_OFFSET fs, 0;*/\
 109        pushl %es; \
 110        CFI_ADJUST_CFA_OFFSET 4;\
 111        /*CFI_REL_OFFSET es, 0;*/\
 112        pushl %ds; \
 113        CFI_ADJUST_CFA_OFFSET 4;\
 114        /*CFI_REL_OFFSET ds, 0;*/\
 115        pushl %eax; \
 116        CFI_ADJUST_CFA_OFFSET 4;\
 117        CFI_REL_OFFSET eax, 0;\
 118        pushl %ebp; \
 119        CFI_ADJUST_CFA_OFFSET 4;\
 120        CFI_REL_OFFSET ebp, 0;\
 121        pushl %edi; \
 122        CFI_ADJUST_CFA_OFFSET 4;\
 123        CFI_REL_OFFSET edi, 0;\
 124        pushl %esi; \
 125        CFI_ADJUST_CFA_OFFSET 4;\
 126        CFI_REL_OFFSET esi, 0;\
 127        pushl %edx; \
 128        CFI_ADJUST_CFA_OFFSET 4;\
 129        CFI_REL_OFFSET edx, 0;\
 130        pushl %ecx; \
 131        CFI_ADJUST_CFA_OFFSET 4;\
 132        CFI_REL_OFFSET ecx, 0;\
 133        pushl %ebx; \
 134        CFI_ADJUST_CFA_OFFSET 4;\
 135        CFI_REL_OFFSET ebx, 0;\
 136        movl $(__USER_DS), %edx; \
 137        movl %edx, %ds; \
 138        movl %edx, %es; \
 139        movl $(__KERNEL_PERCPU), %edx; \
 140        movl %edx, %fs
 141
 142#define RESTORE_INT_REGS \
 143        popl %ebx;      \
 144        CFI_ADJUST_CFA_OFFSET -4;\
 145        CFI_RESTORE ebx;\
 146        popl %ecx;      \
 147        CFI_ADJUST_CFA_OFFSET -4;\
 148        CFI_RESTORE ecx;\
 149        popl %edx;      \
 150        CFI_ADJUST_CFA_OFFSET -4;\
 151        CFI_RESTORE edx;\
 152        popl %esi;      \
 153        CFI_ADJUST_CFA_OFFSET -4;\
 154        CFI_RESTORE esi;\
 155        popl %edi;      \
 156        CFI_ADJUST_CFA_OFFSET -4;\
 157        CFI_RESTORE edi;\
 158        popl %ebp;      \
 159        CFI_ADJUST_CFA_OFFSET -4;\
 160        CFI_RESTORE ebp;\
 161        popl %eax;      \
 162        CFI_ADJUST_CFA_OFFSET -4;\
 163        CFI_RESTORE eax
 164
 165#define RESTORE_REGS    \
 166        RESTORE_INT_REGS; \
 1671:      popl %ds;       \
 168        CFI_ADJUST_CFA_OFFSET -4;\
 169        /*CFI_RESTORE ds;*/\
 1702:      popl %es;       \
 171        CFI_ADJUST_CFA_OFFSET -4;\
 172        /*CFI_RESTORE es;*/\
 1733:      popl %fs;       \
 174        CFI_ADJUST_CFA_OFFSET -4;\
 175        /*CFI_RESTORE fs;*/\
 176.pushsection .fixup,"ax";       \
 1774:      movl $0,(%esp); \
 178        jmp 1b;         \
 1795:      movl $0,(%esp); \
 180        jmp 2b;         \
 1816:      movl $0,(%esp); \
 182        jmp 3b;         \
 183.section __ex_table,"a";\
 184        .align 4;       \
 185        .long 1b,4b;    \
 186        .long 2b,5b;    \
 187        .long 3b,6b;    \
 188.popsection
 189
 190#define RING0_INT_FRAME \
 191        CFI_STARTPROC simple;\
 192        CFI_SIGNAL_FRAME;\
 193        CFI_DEF_CFA esp, 3*4;\
 194        /*CFI_OFFSET cs, -2*4;*/\
 195        CFI_OFFSET eip, -3*4
 196
 197#define RING0_EC_FRAME \
 198        CFI_STARTPROC simple;\
 199        CFI_SIGNAL_FRAME;\
 200        CFI_DEF_CFA esp, 4*4;\
 201        /*CFI_OFFSET cs, -2*4;*/\
 202        CFI_OFFSET eip, -3*4
 203
 204#define RING0_PTREGS_FRAME \
 205        CFI_STARTPROC simple;\
 206        CFI_SIGNAL_FRAME;\
 207        CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
 208        /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
 209        CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
 210        /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
 211        /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
 212        CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
 213        CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
 214        CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
 215        CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
 216        CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
 217        CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
 218        CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 219
 220ENTRY(ret_from_fork)
 221        CFI_STARTPROC
 222        pushl %eax
 223        CFI_ADJUST_CFA_OFFSET 4
 224        call schedule_tail
 225        GET_THREAD_INFO(%ebp)
 226        popl %eax
 227        CFI_ADJUST_CFA_OFFSET -4
 228        pushl $0x0202                   # Reset kernel eflags
 229        CFI_ADJUST_CFA_OFFSET 4
 230        popfl
 231        CFI_ADJUST_CFA_OFFSET -4
 232        jmp syscall_exit
 233        CFI_ENDPROC
 234END(ret_from_fork)
 235
 236/*
 237 * Return to user mode is not as complex as all this looks,
 238 * but we want the default path for a system call return to
 239 * go as quickly as possible which is why some of this is
 240 * less clear than it otherwise should be.
 241 */
 242
 243        # userspace resumption stub bypassing syscall exit tracing
 244        ALIGN
 245        RING0_PTREGS_FRAME
 246ret_from_exception:
 247        preempt_stop(CLBR_ANY)
 248ret_from_intr:
 249        GET_THREAD_INFO(%ebp)
 250check_userspace:
 251        movl PT_EFLAGS(%esp), %eax      # mix EFLAGS and CS
 252        movb PT_CS(%esp), %al
 253        andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 254        cmpl $USER_RPL, %eax
 255        jb resume_kernel                # not returning to v8086 or userspace
 256
 257ENTRY(resume_userspace)
 258        LOCKDEP_SYS_EXIT
 259        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 260                                        # setting need_resched or sigpending
 261                                        # between sampling and the iret
 262        TRACE_IRQS_OFF
 263        movl TI_flags(%ebp), %ecx
 264        andl $_TIF_WORK_MASK, %ecx      # is there any work to be done on
 265                                        # int/exception return?
 266        jne work_pending
 267        jmp restore_all
 268END(ret_from_exception)
 269
 270#ifdef CONFIG_PREEMPT
 271ENTRY(resume_kernel)
 272        DISABLE_INTERRUPTS(CLBR_ANY)
 273        cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
 274        jnz restore_nocheck
 275need_resched:
 276        movl TI_flags(%ebp), %ecx       # need_resched set ?
 277        testb $_TIF_NEED_RESCHED, %cl
 278        jz restore_all
 279        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)    # interrupts off (exception path) ?
 280        jz restore_all
 281        call preempt_schedule_irq
 282        jmp need_resched
 283END(resume_kernel)
 284#endif
 285        CFI_ENDPROC
 286
 287/* SYSENTER_RETURN points to after the "sysenter" instruction in
 288   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 289
 290        # sysenter call handler stub
 291ENTRY(ia32_sysenter_target)
 292        CFI_STARTPROC simple
 293        CFI_SIGNAL_FRAME
 294        CFI_DEF_CFA esp, 0
 295        CFI_REGISTER esp, ebp
 296        movl TSS_sysenter_sp0(%esp),%esp
 297sysenter_past_esp:
 298        /*
 299         * Interrupts are disabled here, but we can't trace it until
 300         * enough kernel state to call TRACE_IRQS_OFF can be called - but
 301         * we immediately enable interrupts at that point anyway.
 302         */
 303        pushl $(__USER_DS)
 304        CFI_ADJUST_CFA_OFFSET 4
 305        /*CFI_REL_OFFSET ss, 0*/
 306        pushl %ebp
 307        CFI_ADJUST_CFA_OFFSET 4
 308        CFI_REL_OFFSET esp, 0
 309        pushfl
 310        orl $X86_EFLAGS_IF, (%esp)
 311        CFI_ADJUST_CFA_OFFSET 4
 312        pushl $(__USER_CS)
 313        CFI_ADJUST_CFA_OFFSET 4
 314        /*CFI_REL_OFFSET cs, 0*/
 315        /*
 316         * Push current_thread_info()->sysenter_return to the stack.
 317         * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 318         * pushed above; +8 corresponds to copy_thread's esp0 setting.
 319         */
 320        pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
 321        CFI_ADJUST_CFA_OFFSET 4
 322        CFI_REL_OFFSET eip, 0
 323
 324        pushl %eax
 325        CFI_ADJUST_CFA_OFFSET 4
 326        SAVE_ALL
 327        ENABLE_INTERRUPTS(CLBR_NONE)
 328
 329/*
 330 * Load the potential sixth argument from user stack.
 331 * Careful about security.
 332 */
 333        cmpl $__PAGE_OFFSET-3,%ebp
 334        jae syscall_fault
 3351:      movl (%ebp),%ebp
 336        movl %ebp,PT_EBP(%esp)
 337.section __ex_table,"a"
 338        .align 4
 339        .long 1b,syscall_fault
 340.previous
 341
 342        GET_THREAD_INFO(%ebp)
 343
 344        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 345        testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 346        jnz sysenter_audit
 347sysenter_do_call:
 348        cmpl $(nr_syscalls), %eax
 349        jae syscall_badsys
 350        call *sys_call_table(,%eax,4)
 351        movl %eax,PT_EAX(%esp)
 352        LOCKDEP_SYS_EXIT
 353        DISABLE_INTERRUPTS(CLBR_ANY)
 354        TRACE_IRQS_OFF
 355        movl TI_flags(%ebp), %ecx
 356        testw $_TIF_ALLWORK_MASK, %cx
 357        jne sysexit_audit
 358sysenter_exit:
 359/* if something modifies registers it must also disable sysexit */
 360        movl PT_EIP(%esp), %edx
 361        movl PT_OLDESP(%esp), %ecx
 362        xorl %ebp,%ebp
 363        TRACE_IRQS_ON
 3641:      mov  PT_FS(%esp), %fs
 365        ENABLE_INTERRUPTS_SYSEXIT
 366
 367#ifdef CONFIG_AUDITSYSCALL
 368sysenter_audit:
 369        testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 370        jnz syscall_trace_entry
 371        addl $4,%esp
 372        CFI_ADJUST_CFA_OFFSET -4
 373        /* %esi already in 8(%esp)         6th arg: 4th syscall arg */
 374        /* %edx already in 4(%esp)         5th arg: 3rd syscall arg */
 375        /* %ecx already in 0(%esp)         4th arg: 2nd syscall arg */
 376        movl %ebx,%ecx                  /* 3rd arg: 1st syscall arg */
 377        movl %eax,%edx                  /* 2nd arg: syscall number */
 378        movl $AUDIT_ARCH_I386,%eax      /* 1st arg: audit arch */
 379        call audit_syscall_entry
 380        pushl %ebx
 381        CFI_ADJUST_CFA_OFFSET 4
 382        movl PT_EAX(%esp),%eax          /* reload syscall number */
 383        jmp sysenter_do_call
 384
 385sysexit_audit:
 386        testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
 387        jne syscall_exit_work
 388        TRACE_IRQS_ON
 389        ENABLE_INTERRUPTS(CLBR_ANY)
 390        movl %eax,%edx          /* second arg, syscall return value */
 391        cmpl $0,%eax            /* is it < 0? */
 392        setl %al                /* 1 if so, 0 if not */
 393        movzbl %al,%eax         /* zero-extend that */
 394        inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
 395        call audit_syscall_exit
 396        DISABLE_INTERRUPTS(CLBR_ANY)
 397        TRACE_IRQS_OFF
 398        movl TI_flags(%ebp), %ecx
 399        testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
 400        jne syscall_exit_work
 401        movl PT_EAX(%esp),%eax  /* reload syscall return value */
 402        jmp sysenter_exit
 403#endif
 404
 405        CFI_ENDPROC
 406.pushsection .fixup,"ax"
 4072:      movl $0,PT_FS(%esp)
 408        jmp 1b
 409.section __ex_table,"a"
 410        .align 4
 411        .long 1b,2b
 412.popsection
 413ENDPROC(ia32_sysenter_target)
 414
 415        # system call handler stub
 416ENTRY(system_call)
 417        RING0_INT_FRAME                 # can't unwind into user space anyway
 418        pushl %eax                      # save orig_eax
 419        CFI_ADJUST_CFA_OFFSET 4
 420        SAVE_ALL
 421        GET_THREAD_INFO(%ebp)
 422                                        # system call tracing in operation / emulation
 423        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 424        testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 425        jnz syscall_trace_entry
 426        cmpl $(nr_syscalls), %eax
 427        jae syscall_badsys
 428syscall_call:
 429        call *sys_call_table(,%eax,4)
 430        movl %eax,PT_EAX(%esp)          # store the return value
 431syscall_exit:
 432        LOCKDEP_SYS_EXIT
 433        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 434                                        # setting need_resched or sigpending
 435                                        # between sampling and the iret
 436        TRACE_IRQS_OFF
 437        movl TI_flags(%ebp), %ecx
 438        testw $_TIF_ALLWORK_MASK, %cx   # current->work
 439        jne syscall_exit_work
 440
 441restore_all:
 442        movl PT_EFLAGS(%esp), %eax      # mix EFLAGS, SS and CS
 443        # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 444        # are returning to the kernel.
 445        # See comments in process.c:copy_thread() for details.
 446        movb PT_OLDSS(%esp), %ah
 447        movb PT_CS(%esp), %al
 448        andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 449        cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 450        CFI_REMEMBER_STATE
 451        je ldt_ss                       # returning to user-space with LDT SS
 452restore_nocheck:
 453        TRACE_IRQS_IRET
 454restore_nocheck_notrace:
 455        RESTORE_REGS
 456        addl $4, %esp                   # skip orig_eax/error_code
 457        CFI_ADJUST_CFA_OFFSET -4
 458irq_return:
 459        INTERRUPT_RETURN
 460.section .fixup,"ax"
 461ENTRY(iret_exc)
 462        pushl $0                        # no error code
 463        pushl $do_iret_error
 464        jmp error_code
 465.previous
 466.section __ex_table,"a"
 467        .align 4
 468        .long irq_return,iret_exc
 469.previous
 470
 471        CFI_RESTORE_STATE
 472ldt_ss:
 473        larl PT_OLDSS(%esp), %eax
 474        jnz restore_nocheck
 475        testl $0x00400000, %eax         # returning to 32bit stack?
 476        jnz restore_nocheck             # allright, normal return
 477
 478#ifdef CONFIG_PARAVIRT
 479        /*
 480         * The kernel can't run on a non-flat stack if paravirt mode
 481         * is active.  Rather than try to fixup the high bits of
 482         * ESP, bypass this code entirely.  This may break DOSemu
 483         * and/or Wine support in a paravirt VM, although the option
 484         * is still available to implement the setting of the high
 485         * 16-bits in the INTERRUPT_RETURN paravirt-op.
 486         */
 487        cmpl $0, pv_info+PARAVIRT_enabled
 488        jne restore_nocheck
 489#endif
 490
 491        /* If returning to userspace with 16bit stack,
 492         * try to fix the higher word of ESP, as the CPU
 493         * won't restore it.
 494         * This is an "official" bug of all the x86-compatible
 495         * CPUs, which we can try to work around to make
 496         * dosemu and wine happy. */
 497        movl PT_OLDESP(%esp), %eax
 498        movl %esp, %edx
 499        call patch_espfix_desc
 500        pushl $__ESPFIX_SS
 501        CFI_ADJUST_CFA_OFFSET 4
 502        pushl %eax
 503        CFI_ADJUST_CFA_OFFSET 4
 504        DISABLE_INTERRUPTS(CLBR_EAX)
 505        TRACE_IRQS_OFF
 506        lss (%esp), %esp
 507        CFI_ADJUST_CFA_OFFSET -8
 508        jmp restore_nocheck
 509        CFI_ENDPROC
 510ENDPROC(system_call)
 511
 512        # perform work that needs to be done immediately before resumption
 513        ALIGN
 514        RING0_PTREGS_FRAME              # can't unwind into user space anyway
 515work_pending:
 516        testb $_TIF_NEED_RESCHED, %cl
 517        jz work_notifysig
 518work_resched:
 519        call schedule
 520        LOCKDEP_SYS_EXIT
 521        DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 522                                        # setting need_resched or sigpending
 523                                        # between sampling and the iret
 524        TRACE_IRQS_OFF
 525        movl TI_flags(%ebp), %ecx
 526        andl $_TIF_WORK_MASK, %ecx      # is there any work to be done other
 527                                        # than syscall tracing?
 528        jz restore_all
 529        testb $_TIF_NEED_RESCHED, %cl
 530        jnz work_resched
 531
 532work_notifysig:                         # deal with pending signals and
 533                                        # notify-resume requests
 534#ifdef CONFIG_VM86
 535        testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 536        movl %esp, %eax
 537        jne work_notifysig_v86          # returning to kernel-space or
 538                                        # vm86-space
 539        xorl %edx, %edx
 540        call do_notify_resume
 541        jmp resume_userspace_sig
 542
 543        ALIGN
 544work_notifysig_v86:
 545        pushl %ecx                      # save ti_flags for do_notify_resume
 546        CFI_ADJUST_CFA_OFFSET 4
 547        call save_v86_state             # %eax contains pt_regs pointer
 548        popl %ecx
 549        CFI_ADJUST_CFA_OFFSET -4
 550        movl %eax, %esp
 551#else
 552        movl %esp, %eax
 553#endif
 554        xorl %edx, %edx
 555        call do_notify_resume
 556        jmp resume_userspace_sig
 557END(work_pending)
 558
 559        # perform syscall exit tracing
 560        ALIGN
 561syscall_trace_entry:
 562        movl $-ENOSYS,PT_EAX(%esp)
 563        movl %esp, %eax
 564        call syscall_trace_enter
 565        /* What it returned is what we'll actually use.  */
 566        cmpl $(nr_syscalls), %eax
 567        jnae syscall_call
 568        jmp syscall_exit
 569END(syscall_trace_entry)
 570
 571        # perform syscall exit tracing
 572        ALIGN
 573syscall_exit_work:
 574        testb $_TIF_WORK_SYSCALL_EXIT, %cl
 575        jz work_pending
 576        TRACE_IRQS_ON
 577        ENABLE_INTERRUPTS(CLBR_ANY)     # could let syscall_trace_leave() call
 578                                        # schedule() instead
 579        movl %esp, %eax
 580        call syscall_trace_leave
 581        jmp resume_userspace
 582END(syscall_exit_work)
 583        CFI_ENDPROC
 584
 585        RING0_INT_FRAME                 # can't unwind into user space anyway
 586syscall_fault:
 587        GET_THREAD_INFO(%ebp)
 588        movl $-EFAULT,PT_EAX(%esp)
 589        jmp resume_userspace
 590END(syscall_fault)
 591
 592syscall_badsys:
 593        movl $-ENOSYS,PT_EAX(%esp)
 594        jmp resume_userspace
 595END(syscall_badsys)
 596        CFI_ENDPROC
 597
 598#define FIXUP_ESPFIX_STACK \
 599        /* since we are on a wrong stack, we cant make it a C code :( */ \
 600        PER_CPU(gdt_page, %ebx); \
 601        GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
 602        addl %esp, %eax; \
 603        pushl $__KERNEL_DS; \
 604        CFI_ADJUST_CFA_OFFSET 4; \
 605        pushl %eax; \
 606        CFI_ADJUST_CFA_OFFSET 4; \
 607        lss (%esp), %esp; \
 608        CFI_ADJUST_CFA_OFFSET -8;
 609#define UNWIND_ESPFIX_STACK \
 610        movl %ss, %eax; \
 611        /* see if on espfix stack */ \
 612        cmpw $__ESPFIX_SS, %ax; \
 613        jne 27f; \
 614        movl $__KERNEL_DS, %eax; \
 615        movl %eax, %ds; \
 616        movl %eax, %es; \
 617        /* switch to normal stack */ \
 618        FIXUP_ESPFIX_STACK; \
 61927:;
 620
 621/*
 622 * Build the entry stubs and pointer table with
 623 * some assembler magic.
 624 */
 625.section .rodata,"a"
 626ENTRY(interrupt)
 627.text
 628
 629ENTRY(irq_entries_start)
 630        RING0_INT_FRAME
 631vector=0
 632.rept NR_VECTORS
 633        ALIGN
 634 .if vector
 635        CFI_ADJUST_CFA_OFFSET -4
 636 .endif
 6371:      pushl $~(vector)
 638        CFI_ADJUST_CFA_OFFSET 4
 639        jmp common_interrupt
 640 .previous
 641        .long 1b
 642 .text
 643vector=vector+1
 644.endr
 645END(irq_entries_start)
 646
 647.previous
 648END(interrupt)
 649.previous
 650
 651/*
 652 * the CPU automatically disables interrupts when executing an IRQ vector,
 653 * so IRQ-flags tracing has to follow that:
 654 */
 655        ALIGN
 656common_interrupt:
 657        SAVE_ALL
 658        TRACE_IRQS_OFF
 659        movl %esp,%eax
 660        call do_IRQ
 661        jmp ret_from_intr
 662ENDPROC(common_interrupt)
 663        CFI_ENDPROC
 664
 665#define BUILD_INTERRUPT(name, nr)       \
 666ENTRY(name)                             \
 667        RING0_INT_FRAME;                \
 668        pushl $~(nr);                   \
 669        CFI_ADJUST_CFA_OFFSET 4;        \
 670        SAVE_ALL;                       \
 671        TRACE_IRQS_OFF                  \
 672        movl %esp,%eax;                 \
 673        call smp_##name;                \
 674        jmp ret_from_intr;              \
 675        CFI_ENDPROC;                    \
 676ENDPROC(name)
 677
 678/* The include is where all of the SMP etc. interrupts come from */
 679#include "entry_arch.h"
 680
 681KPROBE_ENTRY(page_fault)
 682        RING0_EC_FRAME
 683        pushl $do_page_fault
 684        CFI_ADJUST_CFA_OFFSET 4
 685        ALIGN
 686error_code:
 687        /* the function address is in %fs's slot on the stack */
 688        pushl %es
 689        CFI_ADJUST_CFA_OFFSET 4
 690        /*CFI_REL_OFFSET es, 0*/
 691        pushl %ds
 692        CFI_ADJUST_CFA_OFFSET 4
 693        /*CFI_REL_OFFSET ds, 0*/
 694        pushl %eax
 695        CFI_ADJUST_CFA_OFFSET 4
 696        CFI_REL_OFFSET eax, 0
 697        pushl %ebp
 698        CFI_ADJUST_CFA_OFFSET 4
 699        CFI_REL_OFFSET ebp, 0
 700        pushl %edi
 701        CFI_ADJUST_CFA_OFFSET 4
 702        CFI_REL_OFFSET edi, 0
 703        pushl %esi
 704        CFI_ADJUST_CFA_OFFSET 4
 705        CFI_REL_OFFSET esi, 0
 706        pushl %edx
 707        CFI_ADJUST_CFA_OFFSET 4
 708        CFI_REL_OFFSET edx, 0
 709        pushl %ecx
 710        CFI_ADJUST_CFA_OFFSET 4
 711        CFI_REL_OFFSET ecx, 0
 712        pushl %ebx
 713        CFI_ADJUST_CFA_OFFSET 4
 714        CFI_REL_OFFSET ebx, 0
 715        cld
 716        pushl %fs
 717        CFI_ADJUST_CFA_OFFSET 4
 718        /*CFI_REL_OFFSET fs, 0*/
 719        movl $(__KERNEL_PERCPU), %ecx
 720        movl %ecx, %fs
 721        UNWIND_ESPFIX_STACK
 722        popl %ecx
 723        CFI_ADJUST_CFA_OFFSET -4
 724        /*CFI_REGISTER es, ecx*/
 725        movl PT_FS(%esp), %edi          # get the function address
 726        movl PT_ORIG_EAX(%esp), %edx    # get the error code
 727        movl $-1, PT_ORIG_EAX(%esp)     # no syscall to restart
 728        mov  %ecx, PT_FS(%esp)
 729        /*CFI_REL_OFFSET fs, ES*/
 730        movl $(__USER_DS), %ecx
 731        movl %ecx, %ds
 732        movl %ecx, %es
 733        TRACE_IRQS_OFF
 734        movl %esp,%eax                  # pt_regs pointer
 735        call *%edi
 736        jmp ret_from_exception
 737        CFI_ENDPROC
 738KPROBE_END(page_fault)
 739
 740ENTRY(coprocessor_error)
 741        RING0_INT_FRAME
 742        pushl $0
 743        CFI_ADJUST_CFA_OFFSET 4
 744        pushl $do_coprocessor_error
 745        CFI_ADJUST_CFA_OFFSET 4
 746        jmp error_code
 747        CFI_ENDPROC
 748END(coprocessor_error)
 749
 750ENTRY(simd_coprocessor_error)
 751        RING0_INT_FRAME
 752        pushl $0
 753        CFI_ADJUST_CFA_OFFSET 4
 754        pushl $do_simd_coprocessor_error
 755        CFI_ADJUST_CFA_OFFSET 4
 756        jmp error_code
 757        CFI_ENDPROC
 758END(simd_coprocessor_error)
 759
 760ENTRY(device_not_available)
 761        RING0_INT_FRAME
 762        pushl $-1                       # mark this as an int
 763        CFI_ADJUST_CFA_OFFSET 4
 764        pushl $do_device_not_available
 765        CFI_ADJUST_CFA_OFFSET 4
 766        jmp error_code
 767        CFI_ENDPROC
 768END(device_not_available)
 769
 770/*
 771 * Debug traps and NMI can happen at the one SYSENTER instruction
 772 * that sets up the real kernel stack. Check here, since we can't
 773 * allow the wrong stack to be used.
 774 *
 775 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
 776 * already pushed 3 words if it hits on the sysenter instruction:
 777 * eflags, cs and eip.
 778 *
 779 * We just load the right stack, and push the three (known) values
 780 * by hand onto the new stack - while updating the return eip past
 781 * the instruction that would have done it for sysenter.
 782 */
 783#define FIX_STACK(offset, ok, label)            \
 784        cmpw $__KERNEL_CS,4(%esp);              \
 785        jne ok;                                 \
 786label:                                          \
 787        movl TSS_sysenter_sp0+offset(%esp),%esp;        \
 788        CFI_DEF_CFA esp, 0;                     \
 789        CFI_UNDEFINED eip;                      \
 790        pushfl;                                 \
 791        CFI_ADJUST_CFA_OFFSET 4;                \
 792        pushl $__KERNEL_CS;                     \
 793        CFI_ADJUST_CFA_OFFSET 4;                \
 794        pushl $sysenter_past_esp;               \
 795        CFI_ADJUST_CFA_OFFSET 4;                \
 796        CFI_REL_OFFSET eip, 0
 797
 798KPROBE_ENTRY(debug)
 799        RING0_INT_FRAME
 800        cmpl $ia32_sysenter_target,(%esp)
 801        jne debug_stack_correct
 802        FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
 803debug_stack_correct:
 804        pushl $-1                       # mark this as an int
 805        CFI_ADJUST_CFA_OFFSET 4
 806        SAVE_ALL
 807        TRACE_IRQS_OFF
 808        xorl %edx,%edx                  # error code 0
 809        movl %esp,%eax                  # pt_regs pointer
 810        call do_debug
 811        jmp ret_from_exception
 812        CFI_ENDPROC
 813KPROBE_END(debug)
 814
 815/*
 816 * NMI is doubly nasty. It can happen _while_ we're handling
 817 * a debug fault, and the debug fault hasn't yet been able to
 818 * clear up the stack. So we first check whether we got  an
 819 * NMI on the sysenter entry path, but after that we need to
 820 * check whether we got an NMI on the debug path where the debug
 821 * fault happened on the sysenter path.
 822 */
 823KPROBE_ENTRY(nmi)
 824        RING0_INT_FRAME
 825        pushl %eax
 826        CFI_ADJUST_CFA_OFFSET 4
 827        movl %ss, %eax
 828        cmpw $__ESPFIX_SS, %ax
 829        popl %eax
 830        CFI_ADJUST_CFA_OFFSET -4
 831        je nmi_espfix_stack
 832        cmpl $ia32_sysenter_target,(%esp)
 833        je nmi_stack_fixup
 834        pushl %eax
 835        CFI_ADJUST_CFA_OFFSET 4
 836        movl %esp,%eax
 837        /* Do not access memory above the end of our stack page,
 838         * it might not exist.
 839         */
 840        andl $(THREAD_SIZE-1),%eax
 841        cmpl $(THREAD_SIZE-20),%eax
 842        popl %eax
 843        CFI_ADJUST_CFA_OFFSET -4
 844        jae nmi_stack_correct
 845        cmpl $ia32_sysenter_target,12(%esp)
 846        je nmi_debug_stack_check
 847nmi_stack_correct:
 848        /* We have a RING0_INT_FRAME here */
 849        pushl %eax
 850        CFI_ADJUST_CFA_OFFSET 4
 851        SAVE_ALL
 852        TRACE_IRQS_OFF
 853        xorl %edx,%edx          # zero error code
 854        movl %esp,%eax          # pt_regs pointer
 855        call do_nmi
 856        jmp restore_nocheck_notrace
 857        CFI_ENDPROC
 858
 859nmi_stack_fixup:
 860        RING0_INT_FRAME
 861        FIX_STACK(12,nmi_stack_correct, 1)
 862        jmp nmi_stack_correct
 863
 864nmi_debug_stack_check:
 865        /* We have a RING0_INT_FRAME here */
 866        cmpw $__KERNEL_CS,16(%esp)
 867        jne nmi_stack_correct
 868        cmpl $debug,(%esp)
 869        jb nmi_stack_correct
 870        cmpl $debug_esp_fix_insn,(%esp)
 871        ja nmi_stack_correct
 872        FIX_STACK(24,nmi_stack_correct, 1)
 873        jmp nmi_stack_correct
 874
 875nmi_espfix_stack:
 876        /* We have a RING0_INT_FRAME here.
 877         *
 878         * create the pointer to lss back
 879         */
 880        pushl %ss
 881        CFI_ADJUST_CFA_OFFSET 4
 882        pushl %esp
 883        CFI_ADJUST_CFA_OFFSET 4
 884        addw $4, (%esp)
 885        /* copy the iret frame of 12 bytes */
 886        .rept 3
 887        pushl 16(%esp)
 888        CFI_ADJUST_CFA_OFFSET 4
 889        .endr
 890        pushl %eax
 891        CFI_ADJUST_CFA_OFFSET 4
 892        SAVE_ALL
 893        TRACE_IRQS_OFF
 894        FIXUP_ESPFIX_STACK              # %eax == %esp
 895        xorl %edx,%edx                  # zero error code
 896        call do_nmi
 897        RESTORE_REGS
 898        lss 12+4(%esp), %esp            # back to espfix stack
 899        CFI_ADJUST_CFA_OFFSET -24
 900        jmp irq_return
 901        CFI_ENDPROC
 902KPROBE_END(nmi)
 903
 904#ifdef CONFIG_PARAVIRT
 905ENTRY(native_iret)
 906        iret
 907.section __ex_table,"a"
 908        .align 4
 909        .long native_iret, iret_exc
 910.previous
 911END(native_iret)
 912
 913ENTRY(native_irq_enable_sysexit)
 914        sti
 915        sysexit
 916END(native_irq_enable_sysexit)
 917#endif
 918
 919KPROBE_ENTRY(int3)
 920        RING0_INT_FRAME
 921        pushl $-1                       # mark this as an int
 922        CFI_ADJUST_CFA_OFFSET 4
 923        SAVE_ALL
 924        TRACE_IRQS_OFF
 925        xorl %edx,%edx          # zero error code
 926        movl %esp,%eax          # pt_regs pointer
 927        call do_int3
 928        jmp ret_from_exception
 929        CFI_ENDPROC
 930KPROBE_END(int3)
 931
 932ENTRY(overflow)
 933        RING0_INT_FRAME
 934        pushl $0
 935        CFI_ADJUST_CFA_OFFSET 4
 936        pushl $do_overflow
 937        CFI_ADJUST_CFA_OFFSET 4
 938        jmp error_code
 939        CFI_ENDPROC
 940END(overflow)
 941
 942ENTRY(bounds)
 943        RING0_INT_FRAME
 944        pushl $0
 945        CFI_ADJUST_CFA_OFFSET 4
 946        pushl $do_bounds
 947        CFI_ADJUST_CFA_OFFSET 4
 948        jmp error_code
 949        CFI_ENDPROC
 950END(bounds)
 951
 952ENTRY(invalid_op)
 953        RING0_INT_FRAME
 954        pushl $0
 955        CFI_ADJUST_CFA_OFFSET 4
 956        pushl $do_invalid_op
 957        CFI_ADJUST_CFA_OFFSET 4
 958        jmp error_code
 959        CFI_ENDPROC
 960END(invalid_op)
 961
 962ENTRY(coprocessor_segment_overrun)
 963        RING0_INT_FRAME
 964        pushl $0
 965        CFI_ADJUST_CFA_OFFSET 4
 966        pushl $do_coprocessor_segment_overrun
 967        CFI_ADJUST_CFA_OFFSET 4
 968        jmp error_code
 969        CFI_ENDPROC
 970END(coprocessor_segment_overrun)
 971
 972ENTRY(invalid_TSS)
 973        RING0_EC_FRAME
 974        pushl $do_invalid_TSS
 975        CFI_ADJUST_CFA_OFFSET 4
 976        jmp error_code
 977        CFI_ENDPROC
 978END(invalid_TSS)
 979
 980ENTRY(segment_not_present)
 981        RING0_EC_FRAME
 982        pushl $do_segment_not_present
 983        CFI_ADJUST_CFA_OFFSET 4
 984        jmp error_code
 985        CFI_ENDPROC
 986END(segment_not_present)
 987
 988ENTRY(stack_segment)
 989        RING0_EC_FRAME
 990        pushl $do_stack_segment
 991        CFI_ADJUST_CFA_OFFSET 4
 992        jmp error_code
 993        CFI_ENDPROC
 994END(stack_segment)
 995
 996KPROBE_ENTRY(general_protection)
 997        RING0_EC_FRAME
 998        pushl $do_general_protection
 999        CFI_ADJUST_CFA_OFFSET 4
1000        jmp error_code
1001        CFI_ENDPROC
1002KPROBE_END(general_protection)
1003
1004ENTRY(alignment_check)
1005        RING0_EC_FRAME
1006        pushl $do_alignment_check
1007        CFI_ADJUST_CFA_OFFSET 4
1008        jmp error_code
1009        CFI_ENDPROC
1010END(alignment_check)
1011
1012ENTRY(divide_error)
1013        RING0_INT_FRAME
1014        pushl $0                        # no error code
1015        CFI_ADJUST_CFA_OFFSET 4
1016        pushl $do_divide_error
1017        CFI_ADJUST_CFA_OFFSET 4
1018        jmp error_code
1019        CFI_ENDPROC
1020END(divide_error)
1021
1022#ifdef CONFIG_X86_MCE
1023ENTRY(machine_check)
1024        RING0_INT_FRAME
1025        pushl $0
1026        CFI_ADJUST_CFA_OFFSET 4
1027        pushl machine_check_vector
1028        CFI_ADJUST_CFA_OFFSET 4
1029        jmp error_code
1030        CFI_ENDPROC
1031END(machine_check)
1032#endif
1033
1034ENTRY(spurious_interrupt_bug)
1035        RING0_INT_FRAME
1036        pushl $0
1037        CFI_ADJUST_CFA_OFFSET 4
1038        pushl $do_spurious_interrupt_bug
1039        CFI_ADJUST_CFA_OFFSET 4
1040        jmp error_code
1041        CFI_ENDPROC
1042END(spurious_interrupt_bug)
1043
1044ENTRY(kernel_thread_helper)
1045        pushl $0                # fake return address for unwinder
1046        CFI_STARTPROC
1047        movl %edx,%eax
1048        push %edx
1049        CFI_ADJUST_CFA_OFFSET 4
1050        call *%ebx
1051        push %eax
1052        CFI_ADJUST_CFA_OFFSET 4
1053        call do_exit
1054        CFI_ENDPROC
1055ENDPROC(kernel_thread_helper)
1056
1057#ifdef CONFIG_XEN
1058/* Xen doesn't set %esp to be precisely what the normal sysenter
1059   entrypoint expects, so fix it up before using the normal path. */
1060ENTRY(xen_sysenter_target)
1061        RING0_INT_FRAME
1062        addl $5*4, %esp         /* remove xen-provided frame */
1063        CFI_ADJUST_CFA_OFFSET -5*4
1064        jmp sysenter_past_esp
1065        CFI_ENDPROC
1066
1067ENTRY(xen_hypervisor_callback)
1068        CFI_STARTPROC
1069        pushl $0
1070        CFI_ADJUST_CFA_OFFSET 4
1071        SAVE_ALL
1072        TRACE_IRQS_OFF
1073
1074        /* Check to see if we got the event in the critical
1075           region in xen_iret_direct, after we've reenabled
1076           events and checked for pending events.  This simulates
1077           iret instruction's behaviour where it delivers a
1078           pending interrupt when enabling interrupts. */
1079        movl PT_EIP(%esp),%eax
1080        cmpl $xen_iret_start_crit,%eax
1081        jb   1f
1082        cmpl $xen_iret_end_crit,%eax
1083        jae  1f
1084
1085        jmp  xen_iret_crit_fixup
1086
1087ENTRY(xen_do_upcall)
10881:      mov %esp, %eax
1089        call xen_evtchn_do_upcall
1090        jmp  ret_from_intr
1091        CFI_ENDPROC
1092ENDPROC(xen_hypervisor_callback)
1093
1094# Hypervisor uses this for application faults while it executes.
1095# We get here for two reasons:
1096#  1. Fault while reloading DS, ES, FS or GS
1097#  2. Fault while executing IRET
1098# Category 1 we fix up by reattempting the load, and zeroing the segment
1099# register if the load fails.
1100# Category 2 we fix up by jumping to do_iret_error. We cannot use the
1101# normal Linux return path in this case because if we use the IRET hypercall
1102# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1103# We distinguish between categories by maintaining a status value in EAX.
1104ENTRY(xen_failsafe_callback)
1105        CFI_STARTPROC
1106        pushl %eax
1107        CFI_ADJUST_CFA_OFFSET 4
1108        movl $1,%eax
11091:      mov 4(%esp),%ds
11102:      mov 8(%esp),%es
11113:      mov 12(%esp),%fs
11124:      mov 16(%esp),%gs
1113        testl %eax,%eax
1114        popl %eax
1115        CFI_ADJUST_CFA_OFFSET -4
1116        lea 16(%esp),%esp
1117        CFI_ADJUST_CFA_OFFSET -16
1118        jz 5f
1119        addl $16,%esp
1120        jmp iret_exc            # EAX != 0 => Category 2 (Bad IRET)
11215:      pushl $0                # EAX == 0 => Category 1 (Bad segment)
1122        CFI_ADJUST_CFA_OFFSET 4
1123        SAVE_ALL
1124        jmp ret_from_exception
1125        CFI_ENDPROC
1126
1127.section .fixup,"ax"
11286:      xorl %eax,%eax
1129        movl %eax,4(%esp)
1130        jmp 1b
11317:      xorl %eax,%eax
1132        movl %eax,8(%esp)
1133        jmp 2b
11348:      xorl %eax,%eax
1135        movl %eax,12(%esp)
1136        jmp 3b
11379:      xorl %eax,%eax
1138        movl %eax,16(%esp)
1139        jmp 4b
1140.previous
1141.section __ex_table,"a"
1142        .align 4
1143        .long 1b,6b
1144        .long 2b,7b
1145        .long 3b,8b
1146        .long 4b,9b
1147.previous
1148ENDPROC(xen_failsafe_callback)
1149
1150#endif  /* CONFIG_XEN */
1151
1152#ifdef CONFIG_FUNCTION_TRACER
1153#ifdef CONFIG_DYNAMIC_FTRACE
1154
1155ENTRY(mcount)
1156        ret
1157END(mcount)
1158
1159ENTRY(ftrace_caller)
1160        pushl %eax
1161        pushl %ecx
1162        pushl %edx
1163        movl 0xc(%esp), %eax
1164        movl 0x4(%ebp), %edx
1165        subl $MCOUNT_INSN_SIZE, %eax
1166
1167.globl ftrace_call
1168ftrace_call:
1169        call ftrace_stub
1170
1171        popl %edx
1172        popl %ecx
1173        popl %eax
1174
1175.globl ftrace_stub
1176ftrace_stub:
1177        ret
1178END(ftrace_caller)
1179
1180#else /* ! CONFIG_DYNAMIC_FTRACE */
1181
1182ENTRY(mcount)
1183        cmpl $ftrace_stub, ftrace_trace_function
1184        jnz trace
1185.globl ftrace_stub
1186ftrace_stub:
1187        ret
1188
1189        /* taken from glibc */
1190trace:
1191        pushl %eax
1192        pushl %ecx
1193        pushl %edx
1194        movl 0xc(%esp), %eax
1195        movl 0x4(%ebp), %edx
1196        subl $MCOUNT_INSN_SIZE, %eax
1197
1198        call *ftrace_trace_function
1199
1200        popl %edx
1201        popl %ecx
1202        popl %eax
1203
1204        jmp ftrace_stub
1205END(mcount)
1206#endif /* CONFIG_DYNAMIC_FTRACE */
1207#endif /* CONFIG_FUNCTION_TRACER */
1208
1209.section .rodata,"a"
1210#include "syscall_table_32.S"
1211
1212syscall_table_size=(.-sys_call_table)
1213
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.