1/* 2 * linux/arch/x86_64/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 7 */ 8 9/* 10 * entry.S contains the system-call and fault low-level handling routines. 11 * 12 * NOTE: This code handles signal-recognition, which happens every time 13 * after an interrupt and after each system call. 14 * 15 * Normal syscalls and interrupts don't save a full stack frame, this is 16 * only done for syscall tracing, signals or fork/exec et.al. 17 * 18 * A note on terminology: 19 * - top of stack: Architecture defined interrupt frame from SS to RIP 20 * at the top of the kernel process stack. 21 * - partial stack frame: partially saved registers upto R11. 22 * - full stack frame: Like partial stack frame, but all register saved. 23 * 24 * Some macro usage: 25 * - CFI macros are used to generate dwarf2 unwind information for better 26 * backtraces. They don't change any code. 27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 29 * There are unfortunately lots of special cases where some registers 30 * not touched. The macro is a big mess that should be cleaned up. 31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 32 * Gives a full stack frame. 33 * - ENTRY/END Define functions in the symbol table. 34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 35 * frame that is otherwise undefined after a SYSCALL 36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 37 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 38 */ 39 40#include <linux/linkage.h> 41#include <asm/segment.h> 42#include <asm/cache.h> 43#include <asm/errno.h> 44#include <asm/dwarf2.h> 45#include <asm/calling.h> 46#include <asm/asm-offsets.h> 47#include <asm/msr.h> 48#include <asm/unistd.h> 49#include <asm/thread_info.h> 50#include <asm/hw_irq.h> 51#include <asm/page.h> 52#include <asm/irqflags.h> 53#include <asm/paravirt.h> 54#include <asm/ftrace.h> 55 56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57#include <linux/elf-em.h> 58#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) 59#define __AUDIT_ARCH_64BIT 0x80000000 60#define __AUDIT_ARCH_LE 0x40000000 61 62 .code64 63 64#ifdef CONFIG_FUNCTION_TRACER 65#ifdef CONFIG_DYNAMIC_FTRACE 66ENTRY(mcount) 67 retq 68END(mcount) 69 70ENTRY(ftrace_caller) 71 72 /* taken from glibc */ 73 subq $0x38, %rsp 74 movq %rax, (%rsp) 75 movq %rcx, 8(%rsp) 76 movq %rdx, 16(%rsp) 77 movq %rsi, 24(%rsp) 78 movq %rdi, 32(%rsp) 79 movq %r8, 40(%rsp) 80 movq %r9, 48(%rsp) 81 82 movq 0x38(%rsp), %rdi 83 movq 8(%rbp), %rsi 84 subq $MCOUNT_INSN_SIZE, %rdi 85 86.globl ftrace_call 87ftrace_call: 88 call ftrace_stub 89 90 movq 48(%rsp), %r9 91 movq 40(%rsp), %r8 92 movq 32(%rsp), %rdi 93 movq 24(%rsp), %rsi 94 movq 16(%rsp), %rdx 95 movq 8(%rsp), %rcx 96 movq (%rsp), %rax 97 addq $0x38, %rsp 98 99.globl ftrace_stub 100ftrace_stub: 101 retq 102END(ftrace_caller) 103 104#else /* ! CONFIG_DYNAMIC_FTRACE */ 105ENTRY(mcount) 106 cmpq $ftrace_stub, ftrace_trace_function 107 jnz trace 108.globl ftrace_stub 109ftrace_stub: 110 retq 111 112trace: 113 /* taken from glibc */ 114 subq $0x38, %rsp 115 movq %rax, (%rsp) 116 movq %rcx, 8(%rsp) 117 movq %rdx, 16(%rsp) 118 movq %rsi, 24(%rsp) 119 movq %rdi, 32(%rsp) 120 movq %r8, 40(%rsp) 121 movq %r9, 48(%rsp) 122 123 movq 0x38(%rsp), %rdi 124 movq 8(%rbp), %rsi 125 subq $MCOUNT_INSN_SIZE, %rdi 126 127 call *ftrace_trace_function 128 129 movq 48(%rsp), %r9 130 movq 40(%rsp), %r8 131 movq 32(%rsp), %rdi 132 movq 24(%rsp), %rsi 133 movq 16(%rsp), %rdx 134 movq 8(%rsp), %rcx 135 movq (%rsp), %rax 136 addq $0x38, %rsp 137 138 jmp ftrace_stub 139END(mcount) 140#endif /* CONFIG_DYNAMIC_FTRACE */ 141#endif /* CONFIG_FUNCTION_TRACER */ 142 143#ifndef CONFIG_PREEMPT 144#define retint_kernel retint_restore_args 145#endif 146 147#ifdef CONFIG_PARAVIRT 148ENTRY(native_usergs_sysret64) 149 swapgs 150 sysretq 151#endif /* CONFIG_PARAVIRT */ 152 153 154.macro TRACE_IRQS_IRETQ offset=ARGOFFSET 155#ifdef CONFIG_TRACE_IRQFLAGS 156 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 157 jnc 1f 158 TRACE_IRQS_ON 1591: 160#endif 161.endm 162 163/* 164 * C code is not supposed to know about undefined top of stack. Every time 165 * a C function with an pt_regs argument is called from the SYSCALL based 166 * fast path FIXUP_TOP_OF_STACK is needed. 167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 168 * manipulation. 169 */ 170 171 /* %rsp:at FRAMEEND */ 172 .macro FIXUP_TOP_OF_STACK tmp 173 movq %gs:pda_oldrsp,\tmp 174 movq \tmp,RSP(%rsp) 175 movq $__USER_DS,SS(%rsp) 176 movq $__USER_CS,CS(%rsp) 177 movq $-1,RCX(%rsp) 178 movq R11(%rsp),\tmp /* get eflags */ 179 movq \tmp,EFLAGS(%rsp) 180 .endm 181 182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 183 movq RSP-\offset(%rsp),\tmp 184 movq \tmp,%gs:pda_oldrsp 185 movq EFLAGS-\offset(%rsp),\tmp 186 movq \tmp,R11-\offset(%rsp) 187 .endm 188 189 .macro FAKE_STACK_FRAME child_rip 190 /* push in order ss, rsp, eflags, cs, rip */ 191 xorl %eax, %eax 192 pushq $__KERNEL_DS /* ss */ 193 CFI_ADJUST_CFA_OFFSET 8 194 /*CFI_REL_OFFSET ss,0*/ 195 pushq %rax /* rsp */ 196 CFI_ADJUST_CFA_OFFSET 8 197 CFI_REL_OFFSET rsp,0 198 pushq $(1<<9) /* eflags - interrupts on */ 199 CFI_ADJUST_CFA_OFFSET 8 200 /*CFI_REL_OFFSET rflags,0*/ 201 pushq $__KERNEL_CS /* cs */ 202 CFI_ADJUST_CFA_OFFSET 8 203 /*CFI_REL_OFFSET cs,0*/ 204 pushq \child_rip /* rip */ 205 CFI_ADJUST_CFA_OFFSET 8 206 CFI_REL_OFFSET rip,0 207 pushq %rax /* orig rax */ 208 CFI_ADJUST_CFA_OFFSET 8 209 .endm 210 211 .macro UNFAKE_STACK_FRAME 212 addq $8*6, %rsp 213 CFI_ADJUST_CFA_OFFSET -(6*8) 214 .endm 215 216 .macro CFI_DEFAULT_STACK start=1 217 .if \start 218 CFI_STARTPROC simple 219 CFI_SIGNAL_FRAME 220 CFI_DEF_CFA rsp,SS+8 221 .else 222 CFI_DEF_CFA_OFFSET SS+8 223 .endif 224 CFI_REL_OFFSET r15,R15 225 CFI_REL_OFFSET r14,R14 226 CFI_REL_OFFSET r13,R13 227 CFI_REL_OFFSET r12,R12 228 CFI_REL_OFFSET rbp,RBP 229 CFI_REL_OFFSET rbx,RBX 230 CFI_REL_OFFSET r11,R11 231 CFI_REL_OFFSET r10,R10 232 CFI_REL_OFFSET r9,R9 233 CFI_REL_OFFSET r8,R8 234 CFI_REL_OFFSET rax,RAX 235 CFI_REL_OFFSET rcx,RCX 236 CFI_REL_OFFSET rdx,RDX 237 CFI_REL_OFFSET rsi,RSI 238 CFI_REL_OFFSET rdi,RDI 239 CFI_REL_OFFSET rip,RIP 240 /*CFI_REL_OFFSET cs,CS*/ 241 /*CFI_REL_OFFSET rflags,EFLAGS*/ 242 CFI_REL_OFFSET rsp,RSP 243 /*CFI_REL_OFFSET ss,SS*/ 244 .endm 245/* 246 * A newly forked process directly context switches into this. 247 */ 248/* rdi: prev */ 249ENTRY(ret_from_fork) 250 CFI_DEFAULT_STACK 251 push kernel_eflags(%rip) 252 CFI_ADJUST_CFA_OFFSET 8 253 popf # reset kernel eflags 254 CFI_ADJUST_CFA_OFFSET -8 255 call schedule_tail 256 GET_THREAD_INFO(%rcx) 257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 258 jnz rff_trace 259rff_action: 260 RESTORE_REST 261 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 262 je int_ret_from_sys_call 263 testl $_TIF_IA32,TI_flags(%rcx) 264 jnz int_ret_from_sys_call 265 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 266 jmp ret_from_sys_call 267rff_trace: 268 movq %rsp,%rdi 269 call syscall_trace_leave 270 GET_THREAD_INFO(%rcx) 271 jmp rff_action 272 CFI_ENDPROC 273END(ret_from_fork) 274 275/* 276 * System call entry. Upto 6 arguments in registers are supported. 277 * 278 * SYSCALL does not save anything on the stack and does not change the 279 * stack pointer. 280 */ 281 282/* 283 * Register setup: 284 * rax system call number 285 * rdi arg0 286 * rcx return address for syscall/sysret, C arg3 287 * rsi arg1 288 * rdx arg2 289 * r10 arg3 (--> moved to rcx for C) 290 * r8 arg4 291 * r9 arg5 292 * r11 eflags for syscall/sysret, temporary for C 293 * r12-r15,rbp,rbx saved by C code, not touched. 294 * 295 * Interrupts are off on entry. 296 * Only called from user space. 297 * 298 * XXX if we had a free scratch register we could save the RSP into the stack frame 299 * and report it properly in ps. Unfortunately we haven't. 300 * 301 * When user can change the frames always force IRET. That is because 302 * it deals with uncanonical addresses better. SYSRET has trouble 303 * with them due to bugs in both AMD and Intel CPUs. 304 */ 305 306ENTRY(system_call) 307 CFI_STARTPROC simple 308 CFI_SIGNAL_FRAME 309 CFI_DEF_CFA rsp,PDA_STACKOFFSET 310 CFI_REGISTER rip,rcx 311 /*CFI_REGISTER rflags,r11*/ 312 SWAPGS_UNSAFE_STACK 313 /* 314 * A hypervisor implementation might want to use a label 315 * after the swapgs, so that it can do the swapgs 316 * for the guest and jump here on syscall. 317 */ 318ENTRY(system_call_after_swapgs) 319 320 movq %rsp,%gs:pda_oldrsp 321 movq %gs:pda_kernelstack,%rsp 322 /* 323 * No need to follow this irqs off/on section - it's straight 324 * and short: 325 */ 326 ENABLE_INTERRUPTS(CLBR_NONE) 327 SAVE_ARGS 8,1 328 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 329 movq %rcx,RIP-ARGOFFSET(%rsp) 330 CFI_REL_OFFSET rip,RIP-ARGOFFSET 331 GET_THREAD_INFO(%rcx) 332 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) 333 jnz tracesys 334system_call_fastpath: 335 cmpq $__NR_syscall_max,%rax 336 ja badsys 337 movq %r10,%rcx 338 call *sys_call_table(,%rax,8) # XXX: rip relative 339 movq %rax,RAX-ARGOFFSET(%rsp) 340/* 341 * Syscall return path ending with SYSRET (fast path) 342 * Has incomplete stack frame and undefined top of stack. 343 */ 344ret_from_sys_call: 345 movl $_TIF_ALLWORK_MASK,%edi 346 /* edi: flagmask */ 347sysret_check: 348 LOCKDEP_SYS_EXIT 349 GET_THREAD_INFO(%rcx) 350 DISABLE_INTERRUPTS(CLBR_NONE) 351 TRACE_IRQS_OFF 352 movl TI_flags(%rcx),%edx 353 andl %edi,%edx 354 jnz sysret_careful 355 CFI_REMEMBER_STATE 356 /* 357 * sysretq will re-enable interrupts: 358 */ 359 TRACE_IRQS_ON 360 movq RIP-ARGOFFSET(%rsp),%rcx 361 CFI_REGISTER rip,rcx 362 RESTORE_ARGS 0,-ARG_SKIP,1 363 /*CFI_REGISTER rflags,r11*/ 364 movq %gs:pda_oldrsp, %rsp 365 USERGS_SYSRET64 366 367 CFI_RESTORE_STATE 368 /* Handle reschedules */ 369 /* edx: work, edi: workmask */ 370sysret_careful: 371 bt $TIF_NEED_RESCHED,%edx 372 jnc sysret_signal 373 TRACE_IRQS_ON 374 ENABLE_INTERRUPTS(CLBR_NONE) 375 pushq %rdi 376 CFI_ADJUST_CFA_OFFSET 8 377 call schedule 378 popq %rdi 379 CFI_ADJUST_CFA_OFFSET -8 380 jmp sysret_check 381 382 /* Handle a signal */ 383sysret_signal: 384 TRACE_IRQS_ON 385 ENABLE_INTERRUPTS(CLBR_NONE) 386#ifdef CONFIG_AUDITSYSCALL 387 bt $TIF_SYSCALL_AUDIT,%edx 388 jc sysret_audit 389#endif 390 /* edx: work flags (arg3) */ 391 leaq do_notify_resume(%rip),%rax 392 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 393 xorl %esi,%esi # oldset -> arg2 394 call ptregscall_common 395 movl $_TIF_WORK_MASK,%edi 396 /* Use IRET because user could have changed frame. This 397 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 398 DISABLE_INTERRUPTS(CLBR_NONE) 399 TRACE_IRQS_OFF 400 jmp int_with_check 401 402badsys: 403 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 404 jmp ret_from_sys_call 405 406#ifdef CONFIG_AUDITSYSCALL 407 /* 408 * Fast path for syscall audit without full syscall trace. 409 * We just call audit_syscall_entry() directly, and then 410 * jump back to the normal fast path. 411 */ 412auditsys: 413 movq %r10,%r9 /* 6th arg: 4th syscall arg */ 414 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ 415 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ 416 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ 417 movq %rax,%rsi /* 2nd arg: syscall number */ 418 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ 419 call audit_syscall_entry 420 LOAD_ARGS 0 /* reload call-clobbered registers */ 421 jmp system_call_fastpath 422 423 /* 424 * Return fast path for syscall audit. Call audit_syscall_exit() 425 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT 426 * masked off. 427 */ 428sysret_audit: 429 movq %rax,%rsi /* second arg, syscall return value */ 430 cmpq $0,%rax /* is it < 0? */ 431 setl %al /* 1 if so, 0 if not */ 432 movzbl %al,%edi /* zero-extend that into %edi */ 433 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 434 call audit_syscall_exit 435 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 436 jmp sysret_check 437#endif /* CONFIG_AUDITSYSCALL */ 438 439 /* Do syscall tracing */ 440tracesys: 441#ifdef CONFIG_AUDITSYSCALL 442 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 443 jz auditsys 444#endif 445 SAVE_REST 446 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 447 FIXUP_TOP_OF_STACK %rdi 448 movq %rsp,%rdi 449 call syscall_trace_enter 450 /* 451 * Reload arg registers from stack in case ptrace changed them. 452 * We don't reload %rax because syscall_trace_enter() returned 453 * the value it wants us to use in the table lookup. 454 */ 455 LOAD_ARGS ARGOFFSET, 1 456 RESTORE_REST 457 cmpq $__NR_syscall_max,%rax 458 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 459 movq %r10,%rcx /* fixup for C */ 460 call *sys_call_table(,%rax,8) 461 movq %rax,RAX-ARGOFFSET(%rsp) 462 /* Use IRET because user could have changed frame */ 463 464/* 465 * Syscall return path ending with IRET. 466 * Has correct top of stack, but partial stack frame. 467 */ 468 .globl int_ret_from_sys_call 469 .globl int_with_check 470int_ret_from_sys_call: 471 DISABLE_INTERRUPTS(CLBR_NONE) 472 TRACE_IRQS_OFF 473 testl $3,CS-ARGOFFSET(%rsp) 474 je retint_restore_args 475 movl $_TIF_ALLWORK_MASK,%edi 476 /* edi: mask to check */ 477int_with_check: 478 LOCKDEP_SYS_EXIT_IRQ 479 GET_THREAD_INFO(%rcx) 480 movl TI_flags(%rcx),%edx 481 andl %edi,%edx 482 jnz int_careful 483 andl $~TS_COMPAT,TI_status(%rcx) 484 jmp retint_swapgs 485 486 /* Either reschedule or signal or syscall exit tracking needed. */ 487 /* First do a reschedule test. */ 488 /* edx: work, edi: workmask */ 489int_careful: 490 bt $TIF_NEED_RESCHED,%edx 491 jnc int_very_careful 492 TRACE_IRQS_ON 493 ENABLE_INTERRUPTS(CLBR_NONE) 494 pushq %rdi 495 CFI_ADJUST_CFA_OFFSET 8 496 call schedule 497 popq %rdi 498 CFI_ADJUST_CFA_OFFSET -8 499 DISABLE_INTERRUPTS(CLBR_NONE) 500 TRACE_IRQS_OFF 501 jmp int_with_check 502 503 /* handle signals and tracing -- both require a full stack frame */ 504int_very_careful: 505 TRACE_IRQS_ON 506 ENABLE_INTERRUPTS(CLBR_NONE) 507 SAVE_REST 508 /* Check for syscall exit trace */ 509 testl $_TIF_WORK_SYSCALL_EXIT,%edx 510 jz int_signal 511 pushq %rdi 512 CFI_ADJUST_CFA_OFFSET 8 513 leaq 8(%rsp),%rdi # &ptregs -> arg1 514 call syscall_trace_leave 515 popq %rdi 516 CFI_ADJUST_CFA_OFFSET -8 517 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 518 jmp int_restore_rest 519 520int_signal: 521 testl $_TIF_DO_NOTIFY_MASK,%edx 522 jz 1f 523 movq %rsp,%rdi # &ptregs -> arg1 524 xorl %esi,%esi # oldset -> arg2 525 call do_notify_resume 5261: movl $_TIF_WORK_MASK,%edi 527int_restore_rest: 528 RESTORE_REST 529 DISABLE_INTERRUPTS(CLBR_NONE) 530 TRACE_IRQS_OFF 531 jmp int_with_check 532 CFI_ENDPROC 533END(system_call) 534 535/* 536 * Certain special system calls that need to save a complete full stack frame. 537 */ 538 539 .macro PTREGSCALL label,func,arg 540 .globl \label 541\label: 542 leaq \func(%rip),%rax 543 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 544 jmp ptregscall_common 545END(\label) 546 .endm 547 548 CFI_STARTPROC 549 550 PTREGSCALL stub_clone, sys_clone, %r8 551 PTREGSCALL stub_fork, sys_fork, %rdi 552 PTREGSCALL stub_vfork, sys_vfork, %rdi 553 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 554 PTREGSCALL stub_iopl, sys_iopl, %rsi 555 556ENTRY(ptregscall_common) 557 popq %r11 558 CFI_ADJUST_CFA_OFFSET -8 559 CFI_REGISTER rip, r11 560 SAVE_REST 561 movq %r11, %r15 562 CFI_REGISTER rip, r15 563 FIXUP_TOP_OF_STACK %r11 564 call *%rax 565 RESTORE_TOP_OF_STACK %r11 566 movq %r15, %r11 567 CFI_REGISTER rip, r11 568 RESTORE_REST 569 pushq %r11 570 CFI_ADJUST_CFA_OFFSET 8 571 CFI_REL_OFFSET rip, 0 572 ret 573 CFI_ENDPROC 574END(ptregscall_common) 575 576ENTRY(stub_execve) 577 CFI_STARTPROC 578 popq %r11 579 CFI_ADJUST_CFA_OFFSET -8 580 CFI_REGISTER rip, r11 581 SAVE_REST 582 FIXUP_TOP_OF_STACK %r11 583 movq %rsp, %rcx 584 call sys_execve 585 RESTORE_TOP_OF_STACK %r11 586 movq %rax,RAX(%rsp) 587 RESTORE_REST 588 jmp int_ret_from_sys_call 589 CFI_ENDPROC 590END(stub_execve) 591 592/* 593 * sigreturn is special because it needs to restore all registers on return. 594 * This cannot be done with SYSRET, so use the IRET return path instead. 595 */ 596ENTRY(stub_rt_sigreturn) 597 CFI_STARTPROC 598 addq $8, %rsp 599 CFI_ADJUST_CFA_OFFSET -8 600 SAVE_REST 601 movq %rsp,%rdi 602 FIXUP_TOP_OF_STACK %r11 603 call sys_rt_sigreturn 604 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 605 RESTORE_REST 606 jmp int_ret_from_sys_call 607 CFI_ENDPROC 608END(stub_rt_sigreturn) 609 610/* 611 * initial frame state for interrupts and exceptions 612 */ 613 .macro _frame ref 614 CFI_STARTPROC simple 615 CFI_SIGNAL_FRAME 616 CFI_DEF_CFA rsp,SS+8-\ref 617 /*CFI_REL_OFFSET ss,SS-\ref*/ 618 CFI_REL_OFFSET rsp,RSP-\ref 619 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 620 /*CFI_REL_OFFSET cs,CS-\ref*/ 621 CFI_REL_OFFSET rip,RIP-\ref 622 .endm 623 624/* initial frame state for interrupts (and exceptions without error code) */ 625#define INTR_FRAME _frame RIP 626/* initial frame state for exceptions with error code (and interrupts with 627 vector already pushed) */ 628#define XCPT_FRAME _frame ORIG_RAX 629 630/* 631 * Interrupt entry/exit. 632 * 633 * Interrupt entry points save only callee clobbered registers in fast path. 634 * 635 * Entry runs with interrupts off. 636 */ 637 638/* 0(%rsp): interrupt number */ 639 .macro interrupt func 640 cld 641 SAVE_ARGS 642 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 643 pushq %rbp 644 /* 645 * Save rbp twice: One is for marking the stack frame, as usual, and the 646 * other, to fill pt_regs properly. This is because bx comes right 647 * before the last saved register in that structure, and not bp. If the 648 * base pointer were in the place bx is today, this would not be needed. 649 */ 650 movq %rbp, -8(%rsp) 651 CFI_ADJUST_CFA_OFFSET 8 652 CFI_REL_OFFSET rbp, 0 653 movq %rsp,%rbp 654 CFI_DEF_CFA_REGISTER rbp 655 testl $3,CS(%rdi) 656 je 1f 657 SWAPGS 658 /* irqcount is used to check if a CPU is already on an interrupt 659 stack or not. While this is essentially redundant with preempt_count 660 it is a little cheaper to use a separate counter in the PDA 661 (short of moving irq_enter into assembly, which would be too 662 much work) */ 6631: incl %gs:pda_irqcount 664 cmoveq %gs:pda_irqstackptr,%rsp 665 push %rbp # backlink for old unwinder 666 /* 667 * We entered an interrupt context - irqs are off: 668 */ 669 TRACE_IRQS_OFF 670 call \func 671 .endm 672 673ENTRY(common_interrupt) 674 XCPT_FRAME 675 interrupt do_IRQ 676 /* 0(%rsp): oldrsp-ARGOFFSET */ 677ret_from_intr: 678 DISABLE_INTERRUPTS(CLBR_NONE) 679 TRACE_IRQS_OFF 680 decl %gs:pda_irqcount 681 leaveq 682 CFI_DEF_CFA_REGISTER rsp 683 CFI_ADJUST_CFA_OFFSET -8 684exit_intr: 685 GET_THREAD_INFO(%rcx) 686 testl $3,CS-ARGOFFSET(%rsp) 687 je retint_kernel 688 689 /* Interrupt came from user space */ 690 /* 691 * Has a correct top of stack, but a partial stack frame 692 * %rcx: thread info. Interrupts off. 693 */ 694retint_with_reschedule: 695 movl $_TIF_WORK_MASK,%edi 696retint_check: 697 LOCKDEP_SYS_EXIT_IRQ 698 movl TI_flags(%rcx),%edx 699 andl %edi,%edx 700 CFI_REMEMBER_STATE 701 jnz retint_careful 702 703retint_swapgs: /* return to user-space */ 704 /* 705 * The iretq could re-enable interrupts: 706 */ 707 DISABLE_INTERRUPTS(CLBR_ANY) 708 TRACE_IRQS_IRETQ 709 SWAPGS 710 jmp restore_args 711 712retint_restore_args: /* return to kernel space */ 713 DISABLE_INTERRUPTS(CLBR_ANY) 714 /* 715 * The iretq could re-enable interrupts: 716 */ 717 TRACE_IRQS_IRETQ 718restore_args: 719 RESTORE_ARGS 0,8,0 720 721irq_return: 722 INTERRUPT_RETURN 723 724 .section __ex_table, "a" 725 .quad irq_return, bad_iret 726 .previous 727 728#ifdef CONFIG_PARAVIRT 729ENTRY(native_iret) 730 iretq 731 732 .section __ex_table,"a" 733 .quad native_iret, bad_iret 734 .previous 735#endif 736 737 .section .fixup,"ax" 738bad_iret: 739 /* 740 * The iret traps when the %cs or %ss being restored is bogus. 741 * We've lost the original trap vector and error code. 742 * #GPF is the most likely one to get for an invalid selector. 743 * So pretend we completed the iret and took the #GPF in user mode. 744 * 745 * We are now running with the kernel GS after exception recovery. 746 * But error_entry expects us to have user GS to match the user %cs, 747 * so swap back. 748 */ 749 pushq $0 750 751 SWAPGS 752 jmp general_protection 753 754 .previous 755 756 /* edi: workmask, edx: work */ 757retint_careful: 758 CFI_RESTORE_STATE 759 bt $TIF_NEED_RESCHED,%edx 760 jnc retint_signal 761 TRACE_IRQS_ON 762 ENABLE_INTERRUPTS(CLBR_NONE) 763 pushq %rdi 764 CFI_ADJUST_CFA_OFFSET 8 765 call schedule 766 popq %rdi 767 CFI_ADJUST_CFA_OFFSET -8 768 GET_THREAD_INFO(%rcx) 769 DISABLE_INTERRUPTS(CLBR_NONE) 770 TRACE_IRQS_OFF 771 jmp retint_check 772 773retint_signal: 774 testl $_TIF_DO_NOTIFY_MASK,%edx 775 jz retint_swapgs 776 TRACE_IRQS_ON 777 ENABLE_INTERRUPTS(CLBR_NONE) 778 SAVE_REST 779 movq $-1,ORIG_RAX(%rsp) 780 xorl %esi,%esi # oldset 781 movq %rsp,%rdi # &pt_regs 782 call do_notify_resume 783 RESTORE_REST 784 DISABLE_INTERRUPTS(CLBR_NONE) 785 TRACE_IRQS_OFF 786 GET_THREAD_INFO(%rcx) 787 jmp retint_with_reschedule 788 789#ifdef CONFIG_PREEMPT 790 /* Returning to kernel space. Check if we need preemption */ 791 /* rcx: threadinfo. interrupts off. */ 792ENTRY(retint_kernel) 793 cmpl $0,TI_preempt_count(%rcx) 794 jnz retint_restore_args 795 bt $TIF_NEED_RESCHED,TI_flags(%rcx) 796 jnc retint_restore_args 797 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 798 jnc retint_restore_args 799 call preempt_schedule_irq 800 jmp exit_intr 801#endif 802 803 CFI_ENDPROC 804END(common_interrupt) 805 806/* 807 * APIC interrupts. 808 */ 809 .macro apicinterrupt num,func 810 INTR_FRAME 811 pushq $~(\num) 812 CFI_ADJUST_CFA_OFFSET 8 813 interrupt \func 814 jmp ret_from_intr 815 CFI_ENDPROC 816 .endm 817 818ENTRY(thermal_interrupt) 819 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 820END(thermal_interrupt) 821 822ENTRY(threshold_interrupt) 823 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 824END(threshold_interrupt) 825 826#ifdef CONFIG_SMP 827ENTRY(reschedule_interrupt) 828 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 829END(reschedule_interrupt) 830 831 .macro INVALIDATE_ENTRY num 832ENTRY(invalidate_interrupt\num) 833 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 834END(invalidate_interrupt\num) 835 .endm 836 837 INVALIDATE_ENTRY 0 838 INVALIDATE_ENTRY 1 839 INVALIDATE_ENTRY 2 840 INVALIDATE_ENTRY 3 841 INVALIDATE_ENTRY 4 842 INVALIDATE_ENTRY 5 843 INVALIDATE_ENTRY 6 844 INVALIDATE_ENTRY 7 845 846ENTRY(call_function_interrupt) 847 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 848END(call_function_interrupt) 849ENTRY(call_function_single_interrupt) 850 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt 851END(call_function_single_interrupt) 852ENTRY(irq_move_cleanup_interrupt) 853 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt 854END(irq_move_cleanup_interrupt) 855#endif 856 857ENTRY(apic_timer_interrupt) 858 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 859END(apic_timer_interrupt) 860 861ENTRY(uv_bau_message_intr1) 862 apicinterrupt 220,uv_bau_message_interrupt 863END(uv_bau_message_intr1) 864 865ENTRY(error_interrupt) 866 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 867END(error_interrupt) 868 869ENTRY(spurious_interrupt) 870 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 871END(spurious_interrupt) 872 873/* 874 * Exception entry points. 875 */ 876 .macro zeroentry sym 877 INTR_FRAME 878 PARAVIRT_ADJUST_EXCEPTION_FRAME 879 pushq $0 /* push error code/oldrax */ 880 CFI_ADJUST_CFA_OFFSET 8 881 pushq %rax /* push real oldrax to the rdi slot */ 882 CFI_ADJUST_CFA_OFFSET 8 883 CFI_REL_OFFSET rax,0 884 leaq \sym(%rip),%rax 885 jmp error_entry 886 CFI_ENDPROC 887 .endm 888 889 .macro errorentry sym 890 XCPT_FRAME 891 PARAVIRT_ADJUST_EXCEPTION_FRAME 892 pushq %rax 893 CFI_ADJUST_CFA_OFFSET 8 894 CFI_REL_OFFSET rax,0 895 leaq \sym(%rip),%rax 896 jmp error_entry 897 CFI_ENDPROC 898 .endm 899 900 /* error code is on the stack already */ 901 /* handle NMI like exceptions that can happen everywhere */ 902 .macro paranoidentry sym, ist=0, irqtrace=1 903 SAVE_ALL 904 cld 905 movl $1,%ebx 906 movl $MSR_GS_BASE,%ecx 907 rdmsr 908 testl %edx,%edx 909 js 1f 910 SWAPGS 911 xorl %ebx,%ebx 9121: 913 .if \ist 914 movq %gs:pda_data_offset, %rbp 915 .endif 916 .if \irqtrace 917 TRACE_IRQS_OFF 918 .endif 919 movq %rsp,%rdi 920 movq ORIG_RAX(%rsp),%rsi 921 movq $-1,ORIG_RAX(%rsp) 922 .if \ist 923 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 924 .endif 925 call \sym 926 .if \ist 927 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 928 .endif 929 DISABLE_INTERRUPTS(CLBR_NONE) 930 .if \irqtrace 931 TRACE_IRQS_OFF 932 .endif 933 .endm 934 935 /* 936 * "Paranoid" exit path from exception stack. 937 * Paranoid because this is used by NMIs and cannot take 938 * any kernel state for granted. 939 * We don't do kernel preemption checks here, because only 940 * NMI should be common and it does not enable IRQs and 941 * cannot get reschedule ticks. 942 * 943 * "trace" is 0 for the NMI handler only, because irq-tracing 944 * is fundamentally NMI-unsafe. (we cannot change the soft and 945 * hard flags at once, atomically) 946 */ 947 .macro paranoidexit trace=1 948 /* ebx: no swapgs flag */ 949paranoid_exit\trace: 950 testl %ebx,%ebx /* swapgs needed? */ 951 jnz paranoid_restore\trace 952 testl $3,CS(%rsp) 953 jnz paranoid_userspace\trace 954paranoid_swapgs\trace: 955 .if \trace 956 TRACE_IRQS_IRETQ 0 957 .endif 958 SWAPGS_UNSAFE_STACK 959paranoid_restore\trace: 960 RESTORE_ALL 8 961 jmp irq_return 962paranoid_userspace\trace: 963 GET_THREAD_INFO(%rcx) 964 movl TI_flags(%rcx),%ebx 965 andl $_TIF_WORK_MASK,%ebx 966 jz paranoid_swapgs\trace 967 movq %rsp,%rdi /* &pt_regs */ 968 call sync_regs 969 movq %rax,%rsp /* switch stack for scheduling */ 970 testl $_TIF_NEED_RESCHED,%ebx 971 jnz paranoid_schedule\trace 972 movl %ebx,%edx /* arg3: thread flags */ 973 .if \trace 974 TRACE_IRQS_ON 975 .endif 976 ENABLE_INTERRUPTS(CLBR_NONE) 977 xorl %esi,%esi /* arg2: oldset */ 978 movq %rsp,%rdi /* arg1: &pt_regs */ 979 call do_notify_resume 980 DISABLE_INTERRUPTS(CLBR_NONE) 981 .if \trace 982 TRACE_IRQS_OFF 983 .endif 984 jmp paranoid_userspace\trace 985paranoid_schedule\trace: 986 .if \trace 987 TRACE_IRQS_ON 988 .endif 989 ENABLE_INTERRUPTS(CLBR_ANY) 990 call schedule 991 DISABLE_INTERRUPTS(CLBR_ANY) 992 .if \trace 993 TRACE_IRQS_OFF 994 .endif 995 jmp paranoid_userspace\trace 996 CFI_ENDPROC 997 .endm 998 999/* 1000 * Exception entry point. This expects an error code/orig_rax on the stack
1001 * and the exception handler in %rax. 1002 */ 1003KPROBE_ENTRY(error_entry) 1004 _frame RDI 1005 CFI_REL_OFFSET rax,0 1006 /* rdi slot contains rax, oldrax contains error code */ 1007 cld 1008 subq $14*8,%rsp 1009 CFI_ADJUST_CFA_OFFSET (14*8) 1010 movq %rsi,13*8(%rsp) 1011 CFI_REL_OFFSET rsi,RSI 1012 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 1013 CFI_REGISTER rax,rsi 1014 movq %rdx,12*8(%rsp) 1015 CFI_REL_OFFSET rdx,RDX 1016 movq %rcx,11*8(%rsp) 1017 CFI_REL_OFFSET rcx,RCX 1018 movq %rsi,10*8(%rsp) /* store rax */ 1019 CFI_REL_OFFSET rax,RAX 1020 movq %r8, 9*8(%rsp) 1021 CFI_REL_OFFSET r8,R8 1022 movq %r9, 8*8(%rsp) 1023 CFI_REL_OFFSET r9,R9 1024 movq %r10,7*8(%rsp) 1025 CFI_REL_OFFSET r10,R10 1026 movq %r11,6*8(%rsp) 1027 CFI_REL_OFFSET r11,R11 1028 movq %rbx,5*8(%rsp) 1029 CFI_REL_OFFSET rbx,RBX 1030 movq %rbp,4*8(%rsp) 1031 CFI_REL_OFFSET rbp,RBP 1032 movq %r12,3*8(%rsp) 1033 CFI_REL_OFFSET r12,R12 1034 movq %r13,2*8(%rsp) 1035 CFI_REL_OFFSET r13,R13 1036 movq %r14,1*8(%rsp) 1037 CFI_REL_OFFSET r14,R14 1038 movq %r15,(%rsp) 1039 CFI_REL_OFFSET r15,R15 1040 xorl %ebx,%ebx 1041 testl $3,CS(%rsp) 1042 je error_kernelspace 1043error_swapgs: 1044 SWAPGS 1045error_sti: 1046 TRACE_IRQS_OFF 1047 movq %rdi,RDI(%rsp) 1048 CFI_REL_OFFSET rdi,RDI 1049 movq %rsp,%rdi 1050 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1051 movq $-1,ORIG_RAX(%rsp) 1052 call *%rax 1053 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1054error_exit: 1055 movl %ebx,%eax 1056 RESTORE_REST 1057 DISABLE_INTERRUPTS(CLBR_NONE) 1058 TRACE_IRQS_OFF 1059 GET_THREAD_INFO(%rcx) 1060 testl %eax,%eax 1061 jne retint_kernel 1062 LOCKDEP_SYS_EXIT_IRQ 1063 movl TI_flags(%rcx),%edx 1064 movl $_TIF_WORK_MASK,%edi 1065 andl %edi,%edx 1066 jnz retint_careful 1067 jmp retint_swapgs 1068 CFI_ENDPROC 1069 1070error_kernelspace: 1071 incl %ebx 1072 /* There are two places in the kernel that can potentially fault with 1073 usergs. Handle them here. The exception handlers after 1074 iret run with kernel gs again, so don't set the user space flag. 1075 B stepping K8s sometimes report an truncated RIP for IRET 1076 exceptions returning to compat mode. Check for these here too. */ 1077 leaq irq_return(%rip),%rcx 1078 cmpq %rcx,RIP(%rsp) 1079 je error_swapgs 1080 movl %ecx,%ecx /* zero extend */ 1081 cmpq %rcx,RIP(%rsp) 1082 je error_swapgs 1083 cmpq $gs_change,RIP(%rsp) 1084 je error_swapgs 1085 jmp error_sti 1086KPROBE_END(error_entry) 1087 1088 /* Reload gs selector with exception handling */ 1089 /* edi: new selector */ 1090ENTRY(native_load_gs_index) 1091 CFI_STARTPROC 1092 pushf 1093 CFI_ADJUST_CFA_OFFSET 8 1094 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1095 SWAPGS 1096gs_change: 1097 movl %edi,%gs 10982: mfence /* workaround */ 1099 SWAPGS 1100 popf 1101 CFI_ADJUST_CFA_OFFSET -8 1102 ret 1103 CFI_ENDPROC 1104ENDPROC(native_load_gs_index) 1105 1106 .section __ex_table,"a" 1107 .align 8 1108 .quad gs_change,bad_gs 1109 .previous 1110 .section .fixup,"ax" 1111 /* running with kernelgs */ 1112bad_gs: 1113 SWAPGS /* switch back to user gs */ 1114 xorl %eax,%eax 1115 movl %eax,%gs 1116 jmp 2b 1117 .previous 1118 1119/* 1120 * Create a kernel thread. 1121 * 1122 * C extern interface: 1123 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) 1124 * 1125 * asm input arguments: 1126 * rdi: fn, rsi: arg, rdx: flags 1127 */ 1128ENTRY(kernel_thread) 1129 CFI_STARTPROC 1130 FAKE_STACK_FRAME $child_rip 1131 SAVE_ALL 1132 1133 # rdi: flags, rsi: usp, rdx: will be &pt_regs 1134 movq %rdx,%rdi 1135 orq kernel_thread_flags(%rip),%rdi 1136 movq $-1, %rsi 1137 movq %rsp, %rdx 1138 1139 xorl %r8d,%r8d 1140 xorl %r9d,%r9d 1141 1142 # clone now 1143 call do_fork 1144 movq %rax,RAX(%rsp) 1145 xorl %edi,%edi 1146 1147 /* 1148 * It isn't worth to check for reschedule here, 1149 * so internally to the x86_64 port you can rely on kernel_thread() 1150 * not to reschedule the child before returning, this avoids the need 1151 * of hacks for example to fork off the per-CPU idle tasks. 1152 * [Hopefully no generic code relies on the reschedule -AK] 1153 */ 1154 RESTORE_ALL 1155 UNFAKE_STACK_FRAME 1156 ret 1157 CFI_ENDPROC 1158ENDPROC(kernel_thread) 1159 1160child_rip: 1161 pushq $0 # fake return address 1162 CFI_STARTPROC 1163 /* 1164 * Here we are in the child and the registers are set as they were 1165 * at kernel_thread() invocation in the parent. 1166 */ 1167 movq %rdi, %rax 1168 movq %rsi, %rdi 1169 call *%rax 1170 # exit 1171 mov %eax, %edi 1172 call do_exit 1173 CFI_ENDPROC 1174ENDPROC(child_rip) 1175 1176/* 1177 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1178 * 1179 * C extern interface: 1180 * extern long execve(char *name, char **argv, char **envp) 1181 * 1182 * asm input arguments: 1183 * rdi: name, rsi: argv, rdx: envp 1184 * 1185 * We want to fallback into: 1186 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) 1187 * 1188 * do_sys_execve asm fallback arguments: 1189 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack 1190 */ 1191ENTRY(kernel_execve) 1192 CFI_STARTPROC 1193 FAKE_STACK_FRAME $0 1194 SAVE_ALL 1195 movq %rsp,%rcx 1196 call sys_execve 1197 movq %rax, RAX(%rsp) 1198 RESTORE_REST 1199 testq %rax,%rax 1200 je int_ret_from_sys_call 1201 RESTORE_ARGS 1202 UNFAKE_STACK_FRAME 1203 ret 1204 CFI_ENDPROC 1205ENDPROC(kernel_execve) 1206 1207KPROBE_ENTRY(page_fault) 1208 errorentry do_page_fault 1209KPROBE_END(page_fault) 1210 1211ENTRY(coprocessor_error) 1212 zeroentry do_coprocessor_error 1213END(coprocessor_error) 1214 1215ENTRY(simd_coprocessor_error) 1216 zeroentry do_simd_coprocessor_error 1217END(simd_coprocessor_error) 1218 1219ENTRY(device_not_available) 1220 zeroentry do_device_not_available 1221END(device_not_available) 1222 1223 /* runs on exception stack */ 1224KPROBE_ENTRY(debug) 1225 INTR_FRAME 1226 PARAVIRT_ADJUST_EXCEPTION_FRAME 1227 pushq $0 1228 CFI_ADJUST_CFA_OFFSET 8 1229 paranoidentry do_debug, DEBUG_STACK 1230 paranoidexit 1231KPROBE_END(debug) 1232 1233 /* runs on exception stack */ 1234KPROBE_ENTRY(nmi) 1235 INTR_FRAME 1236 PARAVIRT_ADJUST_EXCEPTION_FRAME 1237 pushq $-1 1238 CFI_ADJUST_CFA_OFFSET 8 1239 paranoidentry do_nmi, 0, 0 1240#ifdef CONFIG_TRACE_IRQFLAGS 1241 paranoidexit 0 1242#else 1243 jmp paranoid_exit1 1244 CFI_ENDPROC 1245#endif 1246KPROBE_END(nmi) 1247 1248KPROBE_ENTRY(int3) 1249 INTR_FRAME 1250 PARAVIRT_ADJUST_EXCEPTION_FRAME 1251 pushq $0 1252 CFI_ADJUST_CFA_OFFSET 8 1253 paranoidentry do_int3, DEBUG_STACK 1254 jmp paranoid_exit1 1255 CFI_ENDPROC 1256KPROBE_END(int3) 1257 1258ENTRY(overflow) 1259 zeroentry do_overflow 1260END(overflow) 1261 1262ENTRY(bounds) 1263 zeroentry do_bounds 1264END(bounds) 1265 1266ENTRY(invalid_op) 1267 zeroentry do_invalid_op 1268END(invalid_op) 1269 1270ENTRY(coprocessor_segment_overrun) 1271 zeroentry do_coprocessor_segment_overrun 1272END(coprocessor_segment_overrun) 1273 1274 /* runs on exception stack */ 1275ENTRY(double_fault) 1276 XCPT_FRAME 1277 PARAVIRT_ADJUST_EXCEPTION_FRAME 1278 paranoidentry do_double_fault 1279 jmp paranoid_exit1 1280 CFI_ENDPROC 1281END(double_fault) 1282 1283ENTRY(invalid_TSS) 1284 errorentry do_invalid_TSS 1285END(invalid_TSS) 1286 1287ENTRY(segment_not_present) 1288 errorentry do_segment_not_present 1289END(segment_not_present) 1290 1291 /* runs on exception stack */ 1292ENTRY(stack_segment) 1293 XCPT_FRAME 1294 PARAVIRT_ADJUST_EXCEPTION_FRAME 1295 paranoidentry do_stack_segment 1296 jmp paranoid_exit1 1297 CFI_ENDPROC 1298END(stack_segment) 1299 1300KPROBE_ENTRY(general_protection) 1301 errorentry do_general_protection 1302KPROBE_END(general_protection) 1303 1304ENTRY(alignment_check) 1305 errorentry do_alignment_check 1306END(alignment_check) 1307 1308ENTRY(divide_error) 1309 zeroentry do_divide_error 1310END(divide_error) 1311 1312ENTRY(spurious_interrupt_bug) 1313 zeroentry do_spurious_interrupt_bug 1314END(spurious_interrupt_bug) 1315 1316#ifdef CONFIG_X86_MCE 1317 /* runs on exception stack */ 1318ENTRY(machine_check) 1319 INTR_FRAME 1320 PARAVIRT_ADJUST_EXCEPTION_FRAME 1321 pushq $0 1322 CFI_ADJUST_CFA_OFFSET 8 1323 paranoidentry do_machine_check 1324 jmp paranoid_exit1 1325 CFI_ENDPROC 1326END(machine_check) 1327#endif 1328 1329/* Call softirq on interrupt stack. Interrupts are off. */ 1330ENTRY(call_softirq) 1331 CFI_STARTPROC 1332 push %rbp 1333 CFI_ADJUST_CFA_OFFSET 8 1334 CFI_REL_OFFSET rbp,0 1335 mov %rsp,%rbp 1336 CFI_DEF_CFA_REGISTER rbp 1337 incl %gs:pda_irqcount 1338 cmove %gs:pda_irqstackptr,%rsp 1339 push %rbp # backlink for old unwinder 1340 call __do_softirq 1341 leaveq 1342 CFI_DEF_CFA_REGISTER rsp 1343 CFI_ADJUST_CFA_OFFSET -8 1344 decl %gs:pda_irqcount 1345 ret 1346 CFI_ENDPROC 1347ENDPROC(call_softirq) 1348 1349KPROBE_ENTRY(ignore_sysret) 1350 CFI_STARTPROC 1351 mov $-ENOSYS,%eax 1352 sysret 1353 CFI_ENDPROC 1354ENDPROC(ignore_sysret) 1355 1356#ifdef CONFIG_XEN 1357ENTRY(xen_hypervisor_callback) 1358 zeroentry xen_do_hypervisor_callback 1359END(xen_hypervisor_callback) 1360 1361/* 1362# A note on the "critical region" in our callback handler. 1363# We want to avoid stacking callback handlers due to events occurring 1364# during handling of the last event. To do this, we keep events disabled 1365# until we've done all processing. HOWEVER, we must enable events before 1366# popping the stack frame (can't be done atomically) and so it would still 1367# be possible to get enough handler activations to overflow the stack. 1368# Although unlikely, bugs of that kind are hard to track down, so we'd 1369# like to avoid the possibility. 1370# So, on entry to the handler we detect whether we interrupted an 1371# existing activation in its critical region -- if so, we pop the current 1372# activation and restart the handler using the previous one. 1373*/ 1374ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1375 CFI_STARTPROC 1376/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1377 see the correct pointer to the pt_regs */ 1378 movq %rdi, %rsp # we don't return, adjust the stack frame 1379 CFI_ENDPROC 1380 CFI_DEFAULT_STACK 138111: incl %gs:pda_irqcount 1382 movq %rsp,%rbp 1383 CFI_DEF_CFA_REGISTER rbp 1384 cmovzq %gs:pda_irqstackptr,%rsp 1385 pushq %rbp # backlink for old unwinder 1386 call xen_evtchn_do_upcall 1387 popq %rsp 1388 CFI_DEF_CFA_REGISTER rsp 1389 decl %gs:pda_irqcount 1390 jmp error_exit 1391 CFI_ENDPROC 1392END(do_hypervisor_callback) 1393 1394/* 1395# Hypervisor uses this for application faults while it executes. 1396# We get here for two reasons: 1397# 1. Fault while reloading DS, ES, FS or GS 1398# 2. Fault while executing IRET 1399# Category 1 we do not need to fix up as Xen has already reloaded all segment 1400# registers that could be reloaded and zeroed the others. 1401# Category 2 we fix up by killing the current process. We cannot use the 1402# normal Linux return path in this case because if we use the IRET hypercall 1403# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1404# We distinguish between categories by comparing each saved segment register 1405# with its current contents: any discrepancy means we in category 1. 1406*/ 1407ENTRY(xen_failsafe_callback) 1408 framesz = (RIP-0x30) /* workaround buggy gas */ 1409 _frame framesz 1410 CFI_REL_OFFSET rcx, 0 1411 CFI_REL_OFFSET r11, 8 1412 movw %ds,%cx 1413 cmpw %cx,0x10(%rsp) 1414 CFI_REMEMBER_STATE 1415 jne 1f 1416 movw %es,%cx 1417 cmpw %cx,0x18(%rsp) 1418 jne 1f 1419 movw %fs,%cx 1420 cmpw %cx,0x20(%rsp) 1421 jne 1f 1422 movw %gs,%cx 1423 cmpw %cx,0x28(%rsp) 1424 jne 1f 1425 /* All segments match their saved values => Category 2 (Bad IRET). */ 1426 movq (%rsp),%rcx 1427 CFI_RESTORE rcx 1428 movq 8(%rsp),%r11 1429 CFI_RESTORE r11 1430 addq $0x30,%rsp 1431 CFI_ADJUST_CFA_OFFSET -0x30 1432 pushq $0 1433 CFI_ADJUST_CFA_OFFSET 8 1434 pushq %r11 1435 CFI_ADJUST_CFA_OFFSET 8 1436 pushq %rcx 1437 CFI_ADJUST_CFA_OFFSET 8 1438 jmp general_protection 1439 CFI_RESTORE_STATE 14401: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 1441 movq (%rsp),%rcx 1442 CFI_RESTORE rcx 1443 movq 8(%rsp),%r11 1444 CFI_RESTORE r11 1445 addq $0x30,%rsp 1446 CFI_ADJUST_CFA_OFFSET -0x30 1447 pushq $0 1448 CFI_ADJUST_CFA_OFFSET 8 1449 SAVE_ALL 1450 jmp error_exit 1451 CFI_ENDPROC 1452END(xen_failsafe_callback) 1453 1454#endif /* CONFIG_XEN */ 1455

