linux/arch/x86/kernel/entry_64.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/x86_64/entry.S
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
   6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
   7 */
   8
   9/*
  10 * entry.S contains the system-call and fault low-level handling routines.
  11 *
  12 * NOTE: This code handles signal-recognition, which happens every time
  13 * after an interrupt and after each system call.
  14 * 
  15 * Normal syscalls and interrupts don't save a full stack frame, this is 
  16 * only done for syscall tracing, signals or fork/exec et.al.
  17 * 
  18 * A note on terminology:        
  19 * - top of stack: Architecture defined interrupt frame from SS to RIP 
  20 * at the top of the kernel process stack.      
  21 * - partial stack frame: partially saved registers upto R11.
  22 * - full stack frame: Like partial stack frame, but all register saved. 
  23 *
  24 * Some macro usage:
  25 * - CFI macros are used to generate dwarf2 unwind information for better
  26 * backtraces. They don't change any code.
  27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  29 * There are unfortunately lots of special cases where some registers
  30 * not touched. The macro is a big mess that should be cleaned up.
  31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  32 * Gives a full stack frame.
  33 * - ENTRY/END Define functions in the symbol table.
  34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  35 * frame that is otherwise undefined after a SYSCALL
  36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  38 */
  39
  40#include <linux/linkage.h>
  41#include <asm/segment.h>
  42#include <asm/cache.h>
  43#include <asm/errno.h>
  44#include <asm/dwarf2.h>
  45#include <asm/calling.h>
  46#include <asm/asm-offsets.h>
  47#include <asm/msr.h>
  48#include <asm/unistd.h>
  49#include <asm/thread_info.h>
  50#include <asm/hw_irq.h>
  51#include <asm/page.h>
  52#include <asm/irqflags.h>
  53#include <asm/paravirt.h>
  54#include <asm/ftrace.h>
  55
  56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  57#include <linux/elf-em.h>
  58#define AUDIT_ARCH_X86_64       (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  59#define __AUDIT_ARCH_64BIT 0x80000000
  60#define __AUDIT_ARCH_LE    0x40000000
  61
  62        .code64
  63
  64#ifdef CONFIG_FUNCTION_TRACER
  65#ifdef CONFIG_DYNAMIC_FTRACE
  66ENTRY(mcount)
  67        retq
  68END(mcount)
  69
  70ENTRY(ftrace_caller)
  71
  72        /* taken from glibc */
  73        subq $0x38, %rsp
  74        movq %rax, (%rsp)
  75        movq %rcx, 8(%rsp)
  76        movq %rdx, 16(%rsp)
  77        movq %rsi, 24(%rsp)
  78        movq %rdi, 32(%rsp)
  79        movq %r8, 40(%rsp)
  80        movq %r9, 48(%rsp)
  81
  82        movq 0x38(%rsp), %rdi
  83        movq 8(%rbp), %rsi
  84        subq $MCOUNT_INSN_SIZE, %rdi
  85
  86.globl ftrace_call
  87ftrace_call:
  88        call ftrace_stub
  89
  90        movq 48(%rsp), %r9
  91        movq 40(%rsp), %r8
  92        movq 32(%rsp), %rdi
  93        movq 24(%rsp), %rsi
  94        movq 16(%rsp), %rdx
  95        movq 8(%rsp), %rcx
  96        movq (%rsp), %rax
  97        addq $0x38, %rsp
  98
  99.globl ftrace_stub
 100ftrace_stub:
 101        retq
 102END(ftrace_caller)
 103
 104#else /* ! CONFIG_DYNAMIC_FTRACE */
 105ENTRY(mcount)
 106        cmpq $ftrace_stub, ftrace_trace_function
 107        jnz trace
 108.globl ftrace_stub
 109ftrace_stub:
 110        retq
 111
 112trace:
 113        /* taken from glibc */
 114        subq $0x38, %rsp
 115        movq %rax, (%rsp)
 116        movq %rcx, 8(%rsp)
 117        movq %rdx, 16(%rsp)
 118        movq %rsi, 24(%rsp)
 119        movq %rdi, 32(%rsp)
 120        movq %r8, 40(%rsp)
 121        movq %r9, 48(%rsp)
 122
 123        movq 0x38(%rsp), %rdi
 124        movq 8(%rbp), %rsi
 125        subq $MCOUNT_INSN_SIZE, %rdi
 126
 127        call   *ftrace_trace_function
 128
 129        movq 48(%rsp), %r9
 130        movq 40(%rsp), %r8
 131        movq 32(%rsp), %rdi
 132        movq 24(%rsp), %rsi
 133        movq 16(%rsp), %rdx
 134        movq 8(%rsp), %rcx
 135        movq (%rsp), %rax
 136        addq $0x38, %rsp
 137
 138        jmp ftrace_stub
 139END(mcount)
 140#endif /* CONFIG_DYNAMIC_FTRACE */
 141#endif /* CONFIG_FUNCTION_TRACER */
 142
 143#ifndef CONFIG_PREEMPT
 144#define retint_kernel retint_restore_args
 145#endif  
 146
 147#ifdef CONFIG_PARAVIRT
 148ENTRY(native_usergs_sysret64)
 149        swapgs
 150        sysretq
 151#endif /* CONFIG_PARAVIRT */
 152
 153
 154.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 155#ifdef CONFIG_TRACE_IRQFLAGS
 156        bt   $9,EFLAGS-\offset(%rsp)    /* interrupts off? */
 157        jnc  1f
 158        TRACE_IRQS_ON
 1591:
 160#endif
 161.endm
 162
 163/*
 164 * C code is not supposed to know about undefined top of stack. Every time 
 165 * a C function with an pt_regs argument is called from the SYSCALL based 
 166 * fast path FIXUP_TOP_OF_STACK is needed.
 167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
 168 * manipulation.
 169 */             
 170                
 171        /* %rsp:at FRAMEEND */ 
 172        .macro FIXUP_TOP_OF_STACK tmp
 173        movq    %gs:pda_oldrsp,\tmp
 174        movq    \tmp,RSP(%rsp)
 175        movq    $__USER_DS,SS(%rsp)
 176        movq    $__USER_CS,CS(%rsp)
 177        movq    $-1,RCX(%rsp)
 178        movq    R11(%rsp),\tmp  /* get eflags */
 179        movq    \tmp,EFLAGS(%rsp)
 180        .endm
 181
 182        .macro RESTORE_TOP_OF_STACK tmp,offset=0
 183        movq   RSP-\offset(%rsp),\tmp
 184        movq   \tmp,%gs:pda_oldrsp
 185        movq   EFLAGS-\offset(%rsp),\tmp
 186        movq   \tmp,R11-\offset(%rsp)
 187        .endm
 188
 189        .macro FAKE_STACK_FRAME child_rip
 190        /* push in order ss, rsp, eflags, cs, rip */
 191        xorl %eax, %eax
 192        pushq $__KERNEL_DS /* ss */
 193        CFI_ADJUST_CFA_OFFSET   8
 194        /*CFI_REL_OFFSET        ss,0*/
 195        pushq %rax /* rsp */
 196        CFI_ADJUST_CFA_OFFSET   8
 197        CFI_REL_OFFSET  rsp,0
 198        pushq $(1<<9) /* eflags - interrupts on */
 199        CFI_ADJUST_CFA_OFFSET   8
 200        /*CFI_REL_OFFSET        rflags,0*/
 201        pushq $__KERNEL_CS /* cs */
 202        CFI_ADJUST_CFA_OFFSET   8
 203        /*CFI_REL_OFFSET        cs,0*/
 204        pushq \child_rip /* rip */
 205        CFI_ADJUST_CFA_OFFSET   8
 206        CFI_REL_OFFSET  rip,0
 207        pushq   %rax /* orig rax */
 208        CFI_ADJUST_CFA_OFFSET   8
 209        .endm
 210
 211        .macro UNFAKE_STACK_FRAME
 212        addq $8*6, %rsp
 213        CFI_ADJUST_CFA_OFFSET   -(6*8)
 214        .endm
 215
 216        .macro  CFI_DEFAULT_STACK start=1
 217        .if \start
 218        CFI_STARTPROC   simple
 219        CFI_SIGNAL_FRAME
 220        CFI_DEF_CFA     rsp,SS+8
 221        .else
 222        CFI_DEF_CFA_OFFSET SS+8
 223        .endif
 224        CFI_REL_OFFSET  r15,R15
 225        CFI_REL_OFFSET  r14,R14
 226        CFI_REL_OFFSET  r13,R13
 227        CFI_REL_OFFSET  r12,R12
 228        CFI_REL_OFFSET  rbp,RBP
 229        CFI_REL_OFFSET  rbx,RBX
 230        CFI_REL_OFFSET  r11,R11
 231        CFI_REL_OFFSET  r10,R10
 232        CFI_REL_OFFSET  r9,R9
 233        CFI_REL_OFFSET  r8,R8
 234        CFI_REL_OFFSET  rax,RAX
 235        CFI_REL_OFFSET  rcx,RCX
 236        CFI_REL_OFFSET  rdx,RDX
 237        CFI_REL_OFFSET  rsi,RSI
 238        CFI_REL_OFFSET  rdi,RDI
 239        CFI_REL_OFFSET  rip,RIP
 240        /*CFI_REL_OFFSET        cs,CS*/
 241        /*CFI_REL_OFFSET        rflags,EFLAGS*/
 242        CFI_REL_OFFSET  rsp,RSP
 243        /*CFI_REL_OFFSET        ss,SS*/
 244        .endm
 245/*
 246 * A newly forked process directly context switches into this.
 247 */     
 248/* rdi: prev */ 
 249ENTRY(ret_from_fork)
 250        CFI_DEFAULT_STACK
 251        push kernel_eflags(%rip)
 252        CFI_ADJUST_CFA_OFFSET 8
 253        popf                            # reset kernel eflags
 254        CFI_ADJUST_CFA_OFFSET -8
 255        call schedule_tail
 256        GET_THREAD_INFO(%rcx)
 257        testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
 258        jnz rff_trace
 259rff_action:     
 260        RESTORE_REST
 261        testl $3,CS-ARGOFFSET(%rsp)     # from kernel_thread?
 262        je   int_ret_from_sys_call
 263        testl $_TIF_IA32,TI_flags(%rcx)
 264        jnz  int_ret_from_sys_call
 265        RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
 266        jmp ret_from_sys_call
 267rff_trace:
 268        movq %rsp,%rdi
 269        call syscall_trace_leave
 270        GET_THREAD_INFO(%rcx)   
 271        jmp rff_action
 272        CFI_ENDPROC
 273END(ret_from_fork)
 274
 275/*
 276 * System call entry. Upto 6 arguments in registers are supported.
 277 *
 278 * SYSCALL does not save anything on the stack and does not change the
 279 * stack pointer.
 280 */
 281                
 282/*
 283 * Register setup:      
 284 * rax  system call number
 285 * rdi  arg0
 286 * rcx  return address for syscall/sysret, C arg3 
 287 * rsi  arg1
 288 * rdx  arg2    
 289 * r10  arg3    (--> moved to rcx for C)
 290 * r8   arg4
 291 * r9   arg5
 292 * r11  eflags for syscall/sysret, temporary for C
 293 * r12-r15,rbp,rbx saved by C code, not touched.                
 294 * 
 295 * Interrupts are off on entry.
 296 * Only called from user space.
 297 *
 298 * XXX  if we had a free scratch register we could save the RSP into the stack frame
 299 *      and report it properly in ps. Unfortunately we haven't.
 300 *
 301 * When user can change the frames always force IRET. That is because
 302 * it deals with uncanonical addresses better. SYSRET has trouble
 303 * with them due to bugs in both AMD and Intel CPUs.
 304 */                                     
 305
 306ENTRY(system_call)
 307        CFI_STARTPROC   simple
 308        CFI_SIGNAL_FRAME
 309        CFI_DEF_CFA     rsp,PDA_STACKOFFSET
 310        CFI_REGISTER    rip,rcx
 311        /*CFI_REGISTER  rflags,r11*/
 312        SWAPGS_UNSAFE_STACK
 313        /*
 314         * A hypervisor implementation might want to use a label
 315         * after the swapgs, so that it can do the swapgs
 316         * for the guest and jump here on syscall.
 317         */
 318ENTRY(system_call_after_swapgs)
 319
 320        movq    %rsp,%gs:pda_oldrsp 
 321        movq    %gs:pda_kernelstack,%rsp
 322        /*
 323         * No need to follow this irqs off/on section - it's straight
 324         * and short:
 325         */
 326        ENABLE_INTERRUPTS(CLBR_NONE)
 327        SAVE_ARGS 8,1
 328        movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
 329        movq  %rcx,RIP-ARGOFFSET(%rsp)
 330        CFI_REL_OFFSET rip,RIP-ARGOFFSET
 331        GET_THREAD_INFO(%rcx)
 332        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
 333        jnz tracesys
 334system_call_fastpath:
 335        cmpq $__NR_syscall_max,%rax
 336        ja badsys
 337        movq %r10,%rcx
 338        call *sys_call_table(,%rax,8)  # XXX:    rip relative
 339        movq %rax,RAX-ARGOFFSET(%rsp)
 340/*
 341 * Syscall return path ending with SYSRET (fast path)
 342 * Has incomplete stack frame and undefined top of stack. 
 343 */             
 344ret_from_sys_call:
 345        movl $_TIF_ALLWORK_MASK,%edi
 346        /* edi: flagmask */
 347sysret_check:           
 348        LOCKDEP_SYS_EXIT
 349        GET_THREAD_INFO(%rcx)
 350        DISABLE_INTERRUPTS(CLBR_NONE)
 351        TRACE_IRQS_OFF
 352        movl TI_flags(%rcx),%edx
 353        andl %edi,%edx
 354        jnz  sysret_careful 
 355        CFI_REMEMBER_STATE
 356        /*
 357         * sysretq will re-enable interrupts:
 358         */
 359        TRACE_IRQS_ON
 360        movq RIP-ARGOFFSET(%rsp),%rcx
 361        CFI_REGISTER    rip,rcx
 362        RESTORE_ARGS 0,-ARG_SKIP,1
 363        /*CFI_REGISTER  rflags,r11*/
 364        movq    %gs:pda_oldrsp, %rsp
 365        USERGS_SYSRET64
 366
 367        CFI_RESTORE_STATE
 368        /* Handle reschedules */
 369        /* edx: work, edi: workmask */  
 370sysret_careful:
 371        bt $TIF_NEED_RESCHED,%edx
 372        jnc sysret_signal
 373        TRACE_IRQS_ON
 374        ENABLE_INTERRUPTS(CLBR_NONE)
 375        pushq %rdi
 376        CFI_ADJUST_CFA_OFFSET 8
 377        call schedule
 378        popq  %rdi
 379        CFI_ADJUST_CFA_OFFSET -8
 380        jmp sysret_check
 381
 382        /* Handle a signal */ 
 383sysret_signal:
 384        TRACE_IRQS_ON
 385        ENABLE_INTERRUPTS(CLBR_NONE)
 386#ifdef CONFIG_AUDITSYSCALL
 387        bt $TIF_SYSCALL_AUDIT,%edx
 388        jc sysret_audit
 389#endif
 390        /* edx: work flags (arg3) */
 391        leaq do_notify_resume(%rip),%rax
 392        leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
 393        xorl %esi,%esi # oldset -> arg2
 394        call ptregscall_common
 395        movl $_TIF_WORK_MASK,%edi
 396        /* Use IRET because user could have changed frame. This
 397           works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
 398        DISABLE_INTERRUPTS(CLBR_NONE)
 399        TRACE_IRQS_OFF
 400        jmp int_with_check
 401        
 402badsys:
 403        movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
 404        jmp ret_from_sys_call
 405
 406#ifdef CONFIG_AUDITSYSCALL
 407        /*
 408         * Fast path for syscall audit without full syscall trace.
 409         * We just call audit_syscall_entry() directly, and then
 410         * jump back to the normal fast path.
 411         */
 412auditsys:
 413        movq %r10,%r9                   /* 6th arg: 4th syscall arg */
 414        movq %rdx,%r8                   /* 5th arg: 3rd syscall arg */
 415        movq %rsi,%rcx                  /* 4th arg: 2nd syscall arg */
 416        movq %rdi,%rdx                  /* 3rd arg: 1st syscall arg */
 417        movq %rax,%rsi                  /* 2nd arg: syscall number */
 418        movl $AUDIT_ARCH_X86_64,%edi    /* 1st arg: audit arch */
 419        call audit_syscall_entry
 420        LOAD_ARGS 0             /* reload call-clobbered registers */
 421        jmp system_call_fastpath
 422
 423        /*
 424         * Return fast path for syscall audit.  Call audit_syscall_exit()
 425         * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 426         * masked off.
 427         */
 428sysret_audit:
 429        movq %rax,%rsi          /* second arg, syscall return value */
 430        cmpq $0,%rax            /* is it < 0? */
 431        setl %al                /* 1 if so, 0 if not */
 432        movzbl %al,%edi         /* zero-extend that into %edi */
 433        inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
 434        call audit_syscall_exit
 435        movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 436        jmp sysret_check
 437#endif  /* CONFIG_AUDITSYSCALL */
 438
 439        /* Do syscall tracing */
 440tracesys:                        
 441#ifdef CONFIG_AUDITSYSCALL
 442        testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
 443        jz auditsys
 444#endif
 445        SAVE_REST
 446        movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
 447        FIXUP_TOP_OF_STACK %rdi
 448        movq %rsp,%rdi
 449        call syscall_trace_enter
 450        /*
 451         * Reload arg registers from stack in case ptrace changed them.
 452         * We don't reload %rax because syscall_trace_enter() returned
 453         * the value it wants us to use in the table lookup.
 454         */
 455        LOAD_ARGS ARGOFFSET, 1
 456        RESTORE_REST
 457        cmpq $__NR_syscall_max,%rax
 458        ja   int_ret_from_sys_call      /* RAX(%rsp) set to -ENOSYS above */
 459        movq %r10,%rcx  /* fixup for C */
 460        call *sys_call_table(,%rax,8)
 461        movq %rax,RAX-ARGOFFSET(%rsp)
 462        /* Use IRET because user could have changed frame */
 463                
 464/* 
 465 * Syscall return path ending with IRET.
 466 * Has correct top of stack, but partial stack frame.
 467 */
 468        .globl int_ret_from_sys_call
 469        .globl int_with_check
 470int_ret_from_sys_call:
 471        DISABLE_INTERRUPTS(CLBR_NONE)
 472        TRACE_IRQS_OFF
 473        testl $3,CS-ARGOFFSET(%rsp)
 474        je retint_restore_args
 475        movl $_TIF_ALLWORK_MASK,%edi
 476        /* edi: mask to check */
 477int_with_check:
 478        LOCKDEP_SYS_EXIT_IRQ
 479        GET_THREAD_INFO(%rcx)
 480        movl TI_flags(%rcx),%edx
 481        andl %edi,%edx
 482        jnz   int_careful
 483        andl    $~TS_COMPAT,TI_status(%rcx)
 484        jmp   retint_swapgs
 485
 486        /* Either reschedule or signal or syscall exit tracking needed. */
 487        /* First do a reschedule test. */
 488        /* edx: work, edi: workmask */
 489int_careful:
 490        bt $TIF_NEED_RESCHED,%edx
 491        jnc  int_very_careful
 492        TRACE_IRQS_ON
 493        ENABLE_INTERRUPTS(CLBR_NONE)
 494        pushq %rdi
 495        CFI_ADJUST_CFA_OFFSET 8
 496        call schedule
 497        popq %rdi
 498        CFI_ADJUST_CFA_OFFSET -8
 499        DISABLE_INTERRUPTS(CLBR_NONE)
 500        TRACE_IRQS_OFF
 501        jmp int_with_check
 502
 503        /* handle signals and tracing -- both require a full stack frame */
 504int_very_careful:
 505        TRACE_IRQS_ON
 506        ENABLE_INTERRUPTS(CLBR_NONE)
 507        SAVE_REST
 508        /* Check for syscall exit trace */      
 509        testl $_TIF_WORK_SYSCALL_EXIT,%edx
 510        jz int_signal
 511        pushq %rdi
 512        CFI_ADJUST_CFA_OFFSET 8
 513        leaq 8(%rsp),%rdi       # &ptregs -> arg1       
 514        call syscall_trace_leave
 515        popq %rdi
 516        CFI_ADJUST_CFA_OFFSET -8
 517        andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
 518        jmp int_restore_rest
 519        
 520int_signal:
 521        testl $_TIF_DO_NOTIFY_MASK,%edx
 522        jz 1f
 523        movq %rsp,%rdi          # &ptregs -> arg1
 524        xorl %esi,%esi          # oldset -> arg2
 525        call do_notify_resume
 5261:      movl $_TIF_WORK_MASK,%edi
 527int_restore_rest:
 528        RESTORE_REST
 529        DISABLE_INTERRUPTS(CLBR_NONE)
 530        TRACE_IRQS_OFF
 531        jmp int_with_check
 532        CFI_ENDPROC
 533END(system_call)
 534                
 535/* 
 536 * Certain special system calls that need to save a complete full stack frame.
 537 */                                                             
 538        
 539        .macro PTREGSCALL label,func,arg
 540        .globl \label
 541\label:
 542        leaq    \func(%rip),%rax
 543        leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
 544        jmp     ptregscall_common
 545END(\label)
 546        .endm
 547
 548        CFI_STARTPROC
 549
 550        PTREGSCALL stub_clone, sys_clone, %r8
 551        PTREGSCALL stub_fork, sys_fork, %rdi
 552        PTREGSCALL stub_vfork, sys_vfork, %rdi
 553        PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 554        PTREGSCALL stub_iopl, sys_iopl, %rsi
 555
 556ENTRY(ptregscall_common)
 557        popq %r11
 558        CFI_ADJUST_CFA_OFFSET -8
 559        CFI_REGISTER rip, r11
 560        SAVE_REST
 561        movq %r11, %r15
 562        CFI_REGISTER rip, r15
 563        FIXUP_TOP_OF_STACK %r11
 564        call *%rax
 565        RESTORE_TOP_OF_STACK %r11
 566        movq %r15, %r11
 567        CFI_REGISTER rip, r11
 568        RESTORE_REST
 569        pushq %r11
 570        CFI_ADJUST_CFA_OFFSET 8
 571        CFI_REL_OFFSET rip, 0
 572        ret
 573        CFI_ENDPROC
 574END(ptregscall_common)
 575        
 576ENTRY(stub_execve)
 577        CFI_STARTPROC
 578        popq %r11
 579        CFI_ADJUST_CFA_OFFSET -8
 580        CFI_REGISTER rip, r11
 581        SAVE_REST
 582        FIXUP_TOP_OF_STACK %r11
 583        movq %rsp, %rcx
 584        call sys_execve
 585        RESTORE_TOP_OF_STACK %r11
 586        movq %rax,RAX(%rsp)
 587        RESTORE_REST
 588        jmp int_ret_from_sys_call
 589        CFI_ENDPROC
 590END(stub_execve)
 591        
 592/*
 593 * sigreturn is special because it needs to restore all registers on return.
 594 * This cannot be done with SYSRET, so use the IRET return path instead.
 595 */                
 596ENTRY(stub_rt_sigreturn)
 597        CFI_STARTPROC
 598        addq $8, %rsp
 599        CFI_ADJUST_CFA_OFFSET   -8
 600        SAVE_REST
 601        movq %rsp,%rdi
 602        FIXUP_TOP_OF_STACK %r11
 603        call sys_rt_sigreturn
 604        movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 605        RESTORE_REST
 606        jmp int_ret_from_sys_call
 607        CFI_ENDPROC
 608END(stub_rt_sigreturn)
 609
 610/*
 611 * initial frame state for interrupts and exceptions
 612 */
 613        .macro _frame ref
 614        CFI_STARTPROC simple
 615        CFI_SIGNAL_FRAME
 616        CFI_DEF_CFA rsp,SS+8-\ref
 617        /*CFI_REL_OFFSET ss,SS-\ref*/
 618        CFI_REL_OFFSET rsp,RSP-\ref
 619        /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
 620        /*CFI_REL_OFFSET cs,CS-\ref*/
 621        CFI_REL_OFFSET rip,RIP-\ref
 622        .endm
 623
 624/* initial frame state for interrupts (and exceptions without error code) */
 625#define INTR_FRAME _frame RIP
 626/* initial frame state for exceptions with error code (and interrupts with
 627   vector already pushed) */
 628#define XCPT_FRAME _frame ORIG_RAX
 629
 630/* 
 631 * Interrupt entry/exit.
 632 *
 633 * Interrupt entry points save only callee clobbered registers in fast path.
 634 *      
 635 * Entry runs with interrupts off.      
 636 */ 
 637
 638/* 0(%rsp): interrupt number */ 
 639        .macro interrupt func
 640        cld
 641        SAVE_ARGS
 642        leaq -ARGOFFSET(%rsp),%rdi      # arg1 for handler
 643        pushq %rbp
 644        /*
 645         * Save rbp twice: One is for marking the stack frame, as usual, and the
 646         * other, to fill pt_regs properly. This is because bx comes right
 647         * before the last saved register in that structure, and not bp. If the
 648         * base pointer were in the place bx is today, this would not be needed.
 649         */
 650        movq %rbp, -8(%rsp)
 651        CFI_ADJUST_CFA_OFFSET   8
 652        CFI_REL_OFFSET          rbp, 0
 653        movq %rsp,%rbp
 654        CFI_DEF_CFA_REGISTER    rbp
 655        testl $3,CS(%rdi)
 656        je 1f
 657        SWAPGS
 658        /* irqcount is used to check if a CPU is already on an interrupt
 659           stack or not. While this is essentially redundant with preempt_count
 660           it is a little cheaper to use a separate counter in the PDA
 661           (short of moving irq_enter into assembly, which would be too
 662            much work) */
 6631:      incl    %gs:pda_irqcount
 664        cmoveq %gs:pda_irqstackptr,%rsp
 665        push    %rbp                    # backlink for old unwinder
 666        /*
 667         * We entered an interrupt context - irqs are off:
 668         */
 669        TRACE_IRQS_OFF
 670        call \func
 671        .endm
 672
 673ENTRY(common_interrupt)
 674        XCPT_FRAME
 675        interrupt do_IRQ
 676        /* 0(%rsp): oldrsp-ARGOFFSET */
 677ret_from_intr:
 678        DISABLE_INTERRUPTS(CLBR_NONE)
 679        TRACE_IRQS_OFF
 680        decl %gs:pda_irqcount
 681        leaveq
 682        CFI_DEF_CFA_REGISTER    rsp
 683        CFI_ADJUST_CFA_OFFSET   -8
 684exit_intr:
 685        GET_THREAD_INFO(%rcx)
 686        testl $3,CS-ARGOFFSET(%rsp)
 687        je retint_kernel
 688        
 689        /* Interrupt came from user space */
 690        /*
 691         * Has a correct top of stack, but a partial stack frame
 692         * %rcx: thread info. Interrupts off.
 693         */             
 694retint_with_reschedule:
 695        movl $_TIF_WORK_MASK,%edi
 696retint_check:
 697        LOCKDEP_SYS_EXIT_IRQ
 698        movl TI_flags(%rcx),%edx
 699        andl %edi,%edx
 700        CFI_REMEMBER_STATE
 701        jnz  retint_careful
 702
 703retint_swapgs:          /* return to user-space */
 704        /*
 705         * The iretq could re-enable interrupts:
 706         */
 707        DISABLE_INTERRUPTS(CLBR_ANY)
 708        TRACE_IRQS_IRETQ
 709        SWAPGS
 710        jmp restore_args
 711
 712retint_restore_args:    /* return to kernel space */
 713        DISABLE_INTERRUPTS(CLBR_ANY)
 714        /*
 715         * The iretq could re-enable interrupts:
 716         */
 717        TRACE_IRQS_IRETQ
 718restore_args:
 719        RESTORE_ARGS 0,8,0
 720
 721irq_return:
 722        INTERRUPT_RETURN
 723
 724        .section __ex_table, "a"
 725        .quad irq_return, bad_iret
 726        .previous
 727
 728#ifdef CONFIG_PARAVIRT
 729ENTRY(native_iret)
 730        iretq
 731
 732        .section __ex_table,"a"
 733        .quad native_iret, bad_iret
 734        .previous
 735#endif
 736
 737        .section .fixup,"ax"
 738bad_iret:
 739        /*
 740         * The iret traps when the %cs or %ss being restored is bogus.
 741         * We've lost the original trap vector and error code.
 742         * #GPF is the most likely one to get for an invalid selector.
 743         * So pretend we completed the iret and took the #GPF in user mode.
 744         *
 745         * We are now running with the kernel GS after exception recovery.
 746         * But error_entry expects us to have user GS to match the user %cs,
 747         * so swap back.
 748         */
 749        pushq $0
 750
 751        SWAPGS
 752        jmp general_protection
 753
 754        .previous
 755
 756        /* edi: workmask, edx: work */
 757retint_careful:
 758        CFI_RESTORE_STATE
 759        bt    $TIF_NEED_RESCHED,%edx
 760        jnc   retint_signal
 761        TRACE_IRQS_ON
 762        ENABLE_INTERRUPTS(CLBR_NONE)
 763        pushq %rdi
 764        CFI_ADJUST_CFA_OFFSET   8
 765        call  schedule
 766        popq %rdi               
 767        CFI_ADJUST_CFA_OFFSET   -8
 768        GET_THREAD_INFO(%rcx)
 769        DISABLE_INTERRUPTS(CLBR_NONE)
 770        TRACE_IRQS_OFF
 771        jmp retint_check
 772        
 773retint_signal:
 774        testl $_TIF_DO_NOTIFY_MASK,%edx
 775        jz    retint_swapgs
 776        TRACE_IRQS_ON
 777        ENABLE_INTERRUPTS(CLBR_NONE)
 778        SAVE_REST
 779        movq $-1,ORIG_RAX(%rsp)                         
 780        xorl %esi,%esi          # oldset
 781        movq %rsp,%rdi          # &pt_regs
 782        call do_notify_resume
 783        RESTORE_REST
 784        DISABLE_INTERRUPTS(CLBR_NONE)
 785        TRACE_IRQS_OFF
 786        GET_THREAD_INFO(%rcx)
 787        jmp retint_with_reschedule
 788
 789#ifdef CONFIG_PREEMPT
 790        /* Returning to kernel space. Check if we need preemption */
 791        /* rcx:  threadinfo. interrupts off. */
 792ENTRY(retint_kernel)
 793        cmpl $0,TI_preempt_count(%rcx)
 794        jnz  retint_restore_args
 795        bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
 796        jnc  retint_restore_args
 797        bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
 798        jnc  retint_restore_args
 799        call preempt_schedule_irq
 800        jmp exit_intr
 801#endif  
 802
 803        CFI_ENDPROC
 804END(common_interrupt)
 805        
 806/*
 807 * APIC interrupts.
 808 */             
 809        .macro apicinterrupt num,func
 810        INTR_FRAME
 811        pushq $~(\num)
 812        CFI_ADJUST_CFA_OFFSET 8
 813        interrupt \func
 814        jmp ret_from_intr
 815        CFI_ENDPROC
 816        .endm
 817
 818ENTRY(thermal_interrupt)
 819        apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
 820END(thermal_interrupt)
 821
 822ENTRY(threshold_interrupt)
 823        apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
 824END(threshold_interrupt)
 825
 826#ifdef CONFIG_SMP       
 827ENTRY(reschedule_interrupt)
 828        apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 829END(reschedule_interrupt)
 830
 831        .macro INVALIDATE_ENTRY num
 832ENTRY(invalidate_interrupt\num)
 833        apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 
 834END(invalidate_interrupt\num)
 835        .endm
 836
 837        INVALIDATE_ENTRY 0
 838        INVALIDATE_ENTRY 1
 839        INVALIDATE_ENTRY 2
 840        INVALIDATE_ENTRY 3
 841        INVALIDATE_ENTRY 4
 842        INVALIDATE_ENTRY 5
 843        INVALIDATE_ENTRY 6
 844        INVALIDATE_ENTRY 7
 845
 846ENTRY(call_function_interrupt)
 847        apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 848END(call_function_interrupt)
 849ENTRY(call_function_single_interrupt)
 850        apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
 851END(call_function_single_interrupt)
 852ENTRY(irq_move_cleanup_interrupt)
 853        apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
 854END(irq_move_cleanup_interrupt)
 855#endif
 856
 857ENTRY(apic_timer_interrupt)
 858        apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
 859END(apic_timer_interrupt)
 860
 861ENTRY(uv_bau_message_intr1)
 862        apicinterrupt 220,uv_bau_message_interrupt
 863END(uv_bau_message_intr1)
 864
 865ENTRY(error_interrupt)
 866        apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
 867END(error_interrupt)
 868
 869ENTRY(spurious_interrupt)
 870        apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
 871END(spurious_interrupt)
 872                                
 873/*
 874 * Exception entry points.
 875 */             
 876        .macro zeroentry sym
 877        INTR_FRAME
 878        PARAVIRT_ADJUST_EXCEPTION_FRAME
 879        pushq $0        /* push error code/oldrax */ 
 880        CFI_ADJUST_CFA_OFFSET 8
 881        pushq %rax      /* push real oldrax to the rdi slot */ 
 882        CFI_ADJUST_CFA_OFFSET 8
 883        CFI_REL_OFFSET rax,0
 884        leaq  \sym(%rip),%rax
 885        jmp error_entry
 886        CFI_ENDPROC
 887        .endm   
 888
 889        .macro errorentry sym
 890        XCPT_FRAME
 891        PARAVIRT_ADJUST_EXCEPTION_FRAME
 892        pushq %rax
 893        CFI_ADJUST_CFA_OFFSET 8
 894        CFI_REL_OFFSET rax,0
 895        leaq  \sym(%rip),%rax
 896        jmp error_entry
 897        CFI_ENDPROC
 898        .endm
 899
 900        /* error code is on the stack already */
 901        /* handle NMI like exceptions that can happen everywhere */
 902        .macro paranoidentry sym, ist=0, irqtrace=1
 903        SAVE_ALL
 904        cld
 905        movl $1,%ebx
 906        movl  $MSR_GS_BASE,%ecx
 907        rdmsr
 908        testl %edx,%edx
 909        js    1f
 910        SWAPGS
 911        xorl  %ebx,%ebx
 9121:
 913        .if \ist
 914        movq    %gs:pda_data_offset, %rbp
 915        .endif
 916        .if \irqtrace
 917        TRACE_IRQS_OFF
 918        .endif
 919        movq %rsp,%rdi
 920        movq ORIG_RAX(%rsp),%rsi
 921        movq $-1,ORIG_RAX(%rsp)
 922        .if \ist
 923        subq    $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 924        .endif
 925        call \sym
 926        .if \ist
 927        addq    $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 928        .endif
 929        DISABLE_INTERRUPTS(CLBR_NONE)
 930        .if \irqtrace
 931        TRACE_IRQS_OFF
 932        .endif
 933        .endm
 934
 935        /*
 936         * "Paranoid" exit path from exception stack.
 937         * Paranoid because this is used by NMIs and cannot take
 938         * any kernel state for granted.
 939         * We don't do kernel preemption checks here, because only
 940         * NMI should be common and it does not enable IRQs and
 941         * cannot get reschedule ticks.
 942         *
 943         * "trace" is 0 for the NMI handler only, because irq-tracing
 944         * is fundamentally NMI-unsafe. (we cannot change the soft and
 945         * hard flags at once, atomically)
 946         */
 947        .macro paranoidexit trace=1
 948        /* ebx: no swapgs flag */
 949paranoid_exit\trace:
 950        testl %ebx,%ebx                         /* swapgs needed? */
 951        jnz paranoid_restore\trace
 952        testl $3,CS(%rsp)
 953        jnz   paranoid_userspace\trace
 954paranoid_swapgs\trace:
 955        .if \trace
 956        TRACE_IRQS_IRETQ 0
 957        .endif
 958        SWAPGS_UNSAFE_STACK
 959paranoid_restore\trace:
 960        RESTORE_ALL 8
 961        jmp irq_return
 962paranoid_userspace\trace:
 963        GET_THREAD_INFO(%rcx)
 964        movl TI_flags(%rcx),%ebx
 965        andl $_TIF_WORK_MASK,%ebx
 966        jz paranoid_swapgs\trace
 967        movq %rsp,%rdi                  /* &pt_regs */
 968        call sync_regs
 969        movq %rax,%rsp                  /* switch stack for scheduling */
 970        testl $_TIF_NEED_RESCHED,%ebx
 971        jnz paranoid_schedule\trace
 972        movl %ebx,%edx                  /* arg3: thread flags */
 973        .if \trace
 974        TRACE_IRQS_ON
 975        .endif
 976        ENABLE_INTERRUPTS(CLBR_NONE)
 977        xorl %esi,%esi                  /* arg2: oldset */
 978        movq %rsp,%rdi                  /* arg1: &pt_regs */
 979        call do_notify_resume
 980        DISABLE_INTERRUPTS(CLBR_NONE)
 981        .if \trace
 982        TRACE_IRQS_OFF
 983        .endif
 984        jmp paranoid_userspace\trace
 985paranoid_schedule\trace:
 986        .if \trace
 987        TRACE_IRQS_ON
 988        .endif
 989        ENABLE_INTERRUPTS(CLBR_ANY)
 990        call schedule
 991        DISABLE_INTERRUPTS(CLBR_ANY)
 992        .if \trace
 993        TRACE_IRQS_OFF
 994        .endif
 995        jmp paranoid_userspace\trace
 996        CFI_ENDPROC
 997        .endm
 998
 999/*
1000 * Exception entry point. This expects an error code/orig_rax on the stack
1001 * and the exception handler in %rax.   
1002 */                                             
1003KPROBE_ENTRY(error_entry)
1004        _frame RDI
1005        CFI_REL_OFFSET rax,0
1006        /* rdi slot contains rax, oldrax contains error code */
1007        cld     
1008        subq  $14*8,%rsp
1009        CFI_ADJUST_CFA_OFFSET   (14*8)
1010        movq %rsi,13*8(%rsp)
1011        CFI_REL_OFFSET  rsi,RSI
1012        movq 14*8(%rsp),%rsi    /* load rax from rdi slot */
1013        CFI_REGISTER    rax,rsi
1014        movq %rdx,12*8(%rsp)
1015        CFI_REL_OFFSET  rdx,RDX
1016        movq %rcx,11*8(%rsp)
1017        CFI_REL_OFFSET  rcx,RCX
1018        movq %rsi,10*8(%rsp)    /* store rax */ 
1019        CFI_REL_OFFSET  rax,RAX
1020        movq %r8, 9*8(%rsp)
1021        CFI_REL_OFFSET  r8,R8
1022        movq %r9, 8*8(%rsp)
1023        CFI_REL_OFFSET  r9,R9
1024        movq %r10,7*8(%rsp)
1025        CFI_REL_OFFSET  r10,R10
1026        movq %r11,6*8(%rsp)
1027        CFI_REL_OFFSET  r11,R11
1028        movq %rbx,5*8(%rsp) 
1029        CFI_REL_OFFSET  rbx,RBX
1030        movq %rbp,4*8(%rsp) 
1031        CFI_REL_OFFSET  rbp,RBP
1032        movq %r12,3*8(%rsp) 
1033        CFI_REL_OFFSET  r12,R12
1034        movq %r13,2*8(%rsp) 
1035        CFI_REL_OFFSET  r13,R13
1036        movq %r14,1*8(%rsp) 
1037        CFI_REL_OFFSET  r14,R14
1038        movq %r15,(%rsp) 
1039        CFI_REL_OFFSET  r15,R15
1040        xorl %ebx,%ebx  
1041        testl $3,CS(%rsp)
1042        je  error_kernelspace
1043error_swapgs:   
1044        SWAPGS
1045error_sti:
1046        TRACE_IRQS_OFF
1047        movq %rdi,RDI(%rsp)     
1048        CFI_REL_OFFSET  rdi,RDI
1049        movq %rsp,%rdi
1050        movq ORIG_RAX(%rsp),%rsi        /* get error code */ 
1051        movq $-1,ORIG_RAX(%rsp)
1052        call *%rax
1053        /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1054error_exit:
1055        movl %ebx,%eax
1056        RESTORE_REST
1057        DISABLE_INTERRUPTS(CLBR_NONE)
1058        TRACE_IRQS_OFF
1059        GET_THREAD_INFO(%rcx)   
1060        testl %eax,%eax
1061        jne  retint_kernel
1062        LOCKDEP_SYS_EXIT_IRQ
1063        movl  TI_flags(%rcx),%edx
1064        movl  $_TIF_WORK_MASK,%edi
1065        andl  %edi,%edx
1066        jnz  retint_careful
1067        jmp retint_swapgs
1068        CFI_ENDPROC
1069
1070error_kernelspace:
1071        incl %ebx
1072       /* There are two places in the kernel that can potentially fault with
1073          usergs. Handle them here. The exception handlers after
1074           iret run with kernel gs again, so don't set the user space flag.
1075           B stepping K8s sometimes report an truncated RIP for IRET 
1076           exceptions returning to compat mode. Check for these here too. */
1077        leaq irq_return(%rip),%rcx
1078        cmpq %rcx,RIP(%rsp)
1079        je   error_swapgs
1080        movl %ecx,%ecx  /* zero extend */
1081        cmpq %rcx,RIP(%rsp)
1082        je   error_swapgs
1083        cmpq $gs_change,RIP(%rsp)
1084        je   error_swapgs
1085        jmp  error_sti
1086KPROBE_END(error_entry)
1087        
1088       /* Reload gs selector with exception handling */
1089       /* edi:  new selector */ 
1090ENTRY(native_load_gs_index)
1091        CFI_STARTPROC
1092        pushf
1093        CFI_ADJUST_CFA_OFFSET 8
1094        DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1095        SWAPGS
1096gs_change:     
1097        movl %edi,%gs   
10982:      mfence          /* workaround */
1099        SWAPGS
1100        popf
1101        CFI_ADJUST_CFA_OFFSET -8
1102        ret
1103        CFI_ENDPROC
1104ENDPROC(native_load_gs_index)
1105       
1106        .section __ex_table,"a"
1107        .align 8
1108        .quad gs_change,bad_gs
1109        .previous
1110        .section .fixup,"ax"
1111        /* running with kernelgs */
1112bad_gs: 
1113        SWAPGS                  /* switch back to user gs */
1114        xorl %eax,%eax
1115        movl %eax,%gs
1116        jmp  2b
1117        .previous       
1118        
1119/*
1120 * Create a kernel thread.
1121 *
1122 * C extern interface:
1123 *      extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1124 *
1125 * asm input arguments:
1126 *      rdi: fn, rsi: arg, rdx: flags
1127 */
1128ENTRY(kernel_thread)
1129        CFI_STARTPROC
1130        FAKE_STACK_FRAME $child_rip
1131        SAVE_ALL
1132
1133        # rdi: flags, rsi: usp, rdx: will be &pt_regs
1134        movq %rdx,%rdi
1135        orq  kernel_thread_flags(%rip),%rdi
1136        movq $-1, %rsi
1137        movq %rsp, %rdx
1138
1139        xorl %r8d,%r8d
1140        xorl %r9d,%r9d
1141        
1142        # clone now
1143        call do_fork
1144        movq %rax,RAX(%rsp)
1145        xorl %edi,%edi
1146
1147        /*
1148         * It isn't worth to check for reschedule here,
1149         * so internally to the x86_64 port you can rely on kernel_thread()
1150         * not to reschedule the child before returning, this avoids the need
1151         * of hacks for example to fork off the per-CPU idle tasks.
1152         * [Hopefully no generic code relies on the reschedule -AK]     
1153         */
1154        RESTORE_ALL
1155        UNFAKE_STACK_FRAME
1156        ret
1157        CFI_ENDPROC
1158ENDPROC(kernel_thread)
1159        
1160child_rip:
1161        pushq $0                # fake return address
1162        CFI_STARTPROC
1163        /*
1164         * Here we are in the child and the registers are set as they were
1165         * at kernel_thread() invocation in the parent.
1166         */
1167        movq %rdi, %rax
1168        movq %rsi, %rdi
1169        call *%rax
1170        # exit
1171        mov %eax, %edi
1172        call do_exit
1173        CFI_ENDPROC
1174ENDPROC(child_rip)
1175
1176/*
1177 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1178 *
1179 * C extern interface:
1180 *       extern long execve(char *name, char **argv, char **envp)
1181 *
1182 * asm input arguments:
1183 *      rdi: name, rsi: argv, rdx: envp
1184 *
1185 * We want to fallback into:
1186 *      extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1187 *
1188 * do_sys_execve asm fallback arguments:
1189 *      rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1190 */
1191ENTRY(kernel_execve)
1192        CFI_STARTPROC
1193        FAKE_STACK_FRAME $0
1194        SAVE_ALL        
1195        movq %rsp,%rcx
1196        call sys_execve
1197        movq %rax, RAX(%rsp)    
1198        RESTORE_REST
1199        testq %rax,%rax
1200        je int_ret_from_sys_call
1201        RESTORE_ARGS
1202        UNFAKE_STACK_FRAME
1203        ret
1204        CFI_ENDPROC
1205ENDPROC(kernel_execve)
1206
1207KPROBE_ENTRY(page_fault)
1208        errorentry do_page_fault
1209KPROBE_END(page_fault)
1210
1211ENTRY(coprocessor_error)
1212        zeroentry do_coprocessor_error
1213END(coprocessor_error)
1214
1215ENTRY(simd_coprocessor_error)
1216        zeroentry do_simd_coprocessor_error     
1217END(simd_coprocessor_error)
1218
1219ENTRY(device_not_available)
1220        zeroentry do_device_not_available
1221END(device_not_available)
1222
1223        /* runs on exception stack */
1224KPROBE_ENTRY(debug)
1225        INTR_FRAME
1226        PARAVIRT_ADJUST_EXCEPTION_FRAME
1227        pushq $0
1228        CFI_ADJUST_CFA_OFFSET 8         
1229        paranoidentry do_debug, DEBUG_STACK
1230        paranoidexit
1231KPROBE_END(debug)
1232
1233        /* runs on exception stack */   
1234KPROBE_ENTRY(nmi)
1235        INTR_FRAME
1236        PARAVIRT_ADJUST_EXCEPTION_FRAME
1237        pushq $-1
1238        CFI_ADJUST_CFA_OFFSET 8
1239        paranoidentry do_nmi, 0, 0
1240#ifdef CONFIG_TRACE_IRQFLAGS
1241        paranoidexit 0
1242#else
1243        jmp paranoid_exit1
1244        CFI_ENDPROC
1245#endif
1246KPROBE_END(nmi)
1247
1248KPROBE_ENTRY(int3)
1249        INTR_FRAME
1250        PARAVIRT_ADJUST_EXCEPTION_FRAME
1251        pushq $0
1252        CFI_ADJUST_CFA_OFFSET 8
1253        paranoidentry do_int3, DEBUG_STACK
1254        jmp paranoid_exit1
1255        CFI_ENDPROC
1256KPROBE_END(int3)
1257
1258ENTRY(overflow)
1259        zeroentry do_overflow
1260END(overflow)
1261
1262ENTRY(bounds)
1263        zeroentry do_bounds
1264END(bounds)
1265
1266ENTRY(invalid_op)
1267        zeroentry do_invalid_op 
1268END(invalid_op)
1269
1270ENTRY(coprocessor_segment_overrun)
1271        zeroentry do_coprocessor_segment_overrun
1272END(coprocessor_segment_overrun)
1273
1274        /* runs on exception stack */
1275ENTRY(double_fault)
1276        XCPT_FRAME
1277        PARAVIRT_ADJUST_EXCEPTION_FRAME
1278        paranoidentry do_double_fault
1279        jmp paranoid_exit1
1280        CFI_ENDPROC
1281END(double_fault)
1282
1283ENTRY(invalid_TSS)
1284        errorentry do_invalid_TSS
1285END(invalid_TSS)
1286
1287ENTRY(segment_not_present)
1288        errorentry do_segment_not_present
1289END(segment_not_present)
1290
1291        /* runs on exception stack */
1292ENTRY(stack_segment)
1293        XCPT_FRAME
1294        PARAVIRT_ADJUST_EXCEPTION_FRAME
1295        paranoidentry do_stack_segment
1296        jmp paranoid_exit1
1297        CFI_ENDPROC
1298END(stack_segment)
1299
1300KPROBE_ENTRY(general_protection)
1301        errorentry do_general_protection
1302KPROBE_END(general_protection)
1303
1304ENTRY(alignment_check)
1305        errorentry do_alignment_check
1306END(alignment_check)
1307
1308ENTRY(divide_error)
1309        zeroentry do_divide_error
1310END(divide_error)
1311
1312ENTRY(spurious_interrupt_bug)
1313        zeroentry do_spurious_interrupt_bug
1314END(spurious_interrupt_bug)
1315
1316#ifdef CONFIG_X86_MCE
1317        /* runs on exception stack */
1318ENTRY(machine_check)
1319        INTR_FRAME
1320        PARAVIRT_ADJUST_EXCEPTION_FRAME
1321        pushq $0
1322        CFI_ADJUST_CFA_OFFSET 8 
1323        paranoidentry do_machine_check
1324        jmp paranoid_exit1
1325        CFI_ENDPROC
1326END(machine_check)
1327#endif
1328
1329/* Call softirq on interrupt stack. Interrupts are off. */
1330ENTRY(call_softirq)
1331        CFI_STARTPROC
1332        push %rbp
1333        CFI_ADJUST_CFA_OFFSET   8
1334        CFI_REL_OFFSET rbp,0
1335        mov  %rsp,%rbp
1336        CFI_DEF_CFA_REGISTER rbp
1337        incl %gs:pda_irqcount
1338        cmove %gs:pda_irqstackptr,%rsp
1339        push  %rbp                      # backlink for old unwinder
1340        call __do_softirq
1341        leaveq
1342        CFI_DEF_CFA_REGISTER    rsp
1343        CFI_ADJUST_CFA_OFFSET   -8
1344        decl %gs:pda_irqcount
1345        ret
1346        CFI_ENDPROC
1347ENDPROC(call_softirq)
1348
1349KPROBE_ENTRY(ignore_sysret)
1350        CFI_STARTPROC
1351        mov $-ENOSYS,%eax
1352        sysret
1353        CFI_ENDPROC
1354ENDPROC(ignore_sysret)
1355
1356#ifdef CONFIG_XEN
1357ENTRY(xen_hypervisor_callback)
1358        zeroentry xen_do_hypervisor_callback
1359END(xen_hypervisor_callback)
1360
1361/*
1362# A note on the "critical region" in our callback handler.
1363# We want to avoid stacking callback handlers due to events occurring
1364# during handling of the last event. To do this, we keep events disabled
1365# until we've done all processing. HOWEVER, we must enable events before
1366# popping the stack frame (can't be done atomically) and so it would still
1367# be possible to get enough handler activations to overflow the stack.
1368# Although unlikely, bugs of that kind are hard to track down, so we'd
1369# like to avoid the possibility.
1370# So, on entry to the handler we detect whether we interrupted an
1371# existing activation in its critical region -- if so, we pop the current
1372# activation and restart the handler using the previous one.
1373*/
1374ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
1375        CFI_STARTPROC
1376/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1377   see the correct pointer to the pt_regs */
1378        movq %rdi, %rsp            # we don't return, adjust the stack frame
1379        CFI_ENDPROC
1380        CFI_DEFAULT_STACK
138111:     incl %gs:pda_irqcount
1382        movq %rsp,%rbp
1383        CFI_DEF_CFA_REGISTER rbp
1384        cmovzq %gs:pda_irqstackptr,%rsp
1385        pushq %rbp                      # backlink for old unwinder
1386        call xen_evtchn_do_upcall
1387        popq %rsp
1388        CFI_DEF_CFA_REGISTER rsp
1389        decl %gs:pda_irqcount
1390        jmp  error_exit
1391        CFI_ENDPROC
1392END(do_hypervisor_callback)
1393
1394/*
1395# Hypervisor uses this for application faults while it executes.
1396# We get here for two reasons:
1397#  1. Fault while reloading DS, ES, FS or GS
1398#  2. Fault while executing IRET
1399# Category 1 we do not need to fix up as Xen has already reloaded all segment
1400# registers that could be reloaded and zeroed the others.
1401# Category 2 we fix up by killing the current process. We cannot use the
1402# normal Linux return path in this case because if we use the IRET hypercall
1403# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1404# We distinguish between categories by comparing each saved segment register
1405# with its current contents: any discrepancy means we in category 1.
1406*/
1407ENTRY(xen_failsafe_callback)
1408        framesz = (RIP-0x30)    /* workaround buggy gas */
1409        _frame framesz
1410        CFI_REL_OFFSET rcx, 0
1411        CFI_REL_OFFSET r11, 8
1412        movw %ds,%cx
1413        cmpw %cx,0x10(%rsp)
1414        CFI_REMEMBER_STATE
1415        jne 1f
1416        movw %es,%cx
1417        cmpw %cx,0x18(%rsp)
1418        jne 1f
1419        movw %fs,%cx
1420        cmpw %cx,0x20(%rsp)
1421        jne 1f
1422        movw %gs,%cx
1423        cmpw %cx,0x28(%rsp)
1424        jne 1f
1425        /* All segments match their saved values => Category 2 (Bad IRET). */
1426        movq (%rsp),%rcx
1427        CFI_RESTORE rcx
1428        movq 8(%rsp),%r11
1429        CFI_RESTORE r11
1430        addq $0x30,%rsp
1431        CFI_ADJUST_CFA_OFFSET -0x30
1432        pushq $0
1433        CFI_ADJUST_CFA_OFFSET 8
1434        pushq %r11
1435        CFI_ADJUST_CFA_OFFSET 8
1436        pushq %rcx
1437        CFI_ADJUST_CFA_OFFSET 8
1438        jmp general_protection
1439        CFI_RESTORE_STATE
14401:      /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1441        movq (%rsp),%rcx
1442        CFI_RESTORE rcx
1443        movq 8(%rsp),%r11
1444        CFI_RESTORE r11
1445        addq $0x30,%rsp
1446        CFI_ADJUST_CFA_OFFSET -0x30
1447        pushq $0
1448        CFI_ADJUST_CFA_OFFSET 8
1449        SAVE_ALL
1450        jmp error_exit
1451        CFI_ENDPROC
1452END(xen_failsafe_callback)
1453
1454#endif /* CONFIG_XEN */
1455
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.