1/* 2 * linux/arch/x86_64/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 7 */ 8 9/* 10 * entry.S contains the system-call and fault low-level handling routines. 11 * 12 * NOTE: This code handles signal-recognition, which happens every time 13 * after an interrupt and after each system call. 14 * 15 * Normal syscalls and interrupts don't save a full stack frame, this is 16 * only done for syscall tracing, signals or fork/exec et.al. 17 * 18 * A note on terminology: 19 * - top of stack: Architecture defined interrupt frame from SS to RIP 20 * at the top of the kernel process stack. 21 * - partial stack frame: partially saved registers upto R11. 22 * - full stack frame: Like partial stack frame, but all register saved. 23 * 24 * Some macro usage: 25 * - CFI macros are used to generate dwarf2 unwind information for better 26 * backtraces. They don't change any code. 27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 29 * There are unfortunately lots of special cases where some registers 30 * not touched. The macro is a big mess that should be cleaned up. 31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 32 * Gives a full stack frame. 33 * - ENTRY/END Define functions in the symbol table. 34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 35 * frame that is otherwise undefined after a SYSCALL 36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 37 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 38 */ 39 40#include <linux/linkage.h> 41#include <asm/segment.h> 42#include <asm/cache.h> 43#include <asm/errno.h> 44#include <asm/dwarf2.h> 45#include <asm/calling.h> 46#include <asm/asm-offsets.h> 47#include <asm/msr.h> 48#include <asm/unistd.h> 49#include <asm/thread_info.h> 50#include <asm/hw_irq.h> 51#include <asm/page_types.h> 52#include <asm/irqflags.h> 53#include <asm/paravirt.h> 54#include <asm/ftrace.h> 55#include <asm/percpu.h> 56 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 58#include <linux/elf-em.h> 59#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) 60#define __AUDIT_ARCH_64BIT 0x80000000 61#define __AUDIT_ARCH_LE 0x40000000 62 63 .code64 64#ifdef CONFIG_FUNCTION_TRACER 65#ifdef CONFIG_DYNAMIC_FTRACE 66ENTRY(mcount) 67 retq 68END(mcount) 69 70ENTRY(ftrace_caller) 71 cmpl $0, function_trace_stop 72 jne ftrace_stub 73 74 MCOUNT_SAVE_FRAME 75 76 movq 0x38(%rsp), %rdi 77 movq 8(%rbp), %rsi 78 subq $MCOUNT_INSN_SIZE, %rdi 79 80GLOBAL(ftrace_call) 81 call ftrace_stub 82 83 MCOUNT_RESTORE_FRAME 84 85#ifdef CONFIG_FUNCTION_GRAPH_TRACER 86GLOBAL(ftrace_graph_call) 87 jmp ftrace_stub 88#endif 89 90GLOBAL(ftrace_stub) 91 retq 92END(ftrace_caller) 93 94#else /* ! CONFIG_DYNAMIC_FTRACE */ 95ENTRY(mcount) 96 cmpl $0, function_trace_stop 97 jne ftrace_stub 98 99 cmpq $ftrace_stub, ftrace_trace_function 100 jnz trace 101 102#ifdef CONFIG_FUNCTION_GRAPH_TRACER 103 cmpq $ftrace_stub, ftrace_graph_return 104 jnz ftrace_graph_caller 105 106 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry 107 jnz ftrace_graph_caller 108#endif 109 110GLOBAL(ftrace_stub) 111 retq 112 113trace: 114 MCOUNT_SAVE_FRAME 115 116 movq 0x38(%rsp), %rdi 117 movq 8(%rbp), %rsi 118 subq $MCOUNT_INSN_SIZE, %rdi 119 120 call *ftrace_trace_function 121 122 MCOUNT_RESTORE_FRAME 123 124 jmp ftrace_stub 125END(mcount) 126#endif /* CONFIG_DYNAMIC_FTRACE */ 127#endif /* CONFIG_FUNCTION_TRACER */ 128 129#ifdef CONFIG_FUNCTION_GRAPH_TRACER 130ENTRY(ftrace_graph_caller) 131 cmpl $0, function_trace_stop 132 jne ftrace_stub 133 134 MCOUNT_SAVE_FRAME 135 136 leaq 8(%rbp), %rdi 137 movq 0x38(%rsp), %rsi 138 movq (%rbp), %rdx 139 subq $MCOUNT_INSN_SIZE, %rsi 140 141 call prepare_ftrace_return 142 143 MCOUNT_RESTORE_FRAME 144 145 retq 146END(ftrace_graph_caller) 147 148GLOBAL(return_to_handler) 149 subq $24, %rsp 150 151 /* Save the return values */ 152 movq %rax, (%rsp) 153 movq %rdx, 8(%rsp) 154 movq %rbp, %rdi 155 156 call ftrace_return_to_handler 157 158 movq %rax, %rdi 159 movq 8(%rsp), %rdx 160 movq (%rsp), %rax 161 addq $24, %rsp 162 jmp *%rdi 163#endif 164 165 166#ifndef CONFIG_PREEMPT 167#define retint_kernel retint_restore_args 168#endif 169 170#ifdef CONFIG_PARAVIRT 171ENTRY(native_usergs_sysret64) 172 swapgs 173 sysretq 174ENDPROC(native_usergs_sysret64) 175#endif /* CONFIG_PARAVIRT */ 176 177 178.macro TRACE_IRQS_IRETQ offset=ARGOFFSET 179#ifdef CONFIG_TRACE_IRQFLAGS 180 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 181 jnc 1f 182 TRACE_IRQS_ON 1831: 184#endif 185.endm 186 187/* 188 * C code is not supposed to know about undefined top of stack. Every time 189 * a C function with an pt_regs argument is called from the SYSCALL based 190 * fast path FIXUP_TOP_OF_STACK is needed. 191 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 192 * manipulation. 193 */ 194 195 /* %rsp:at FRAMEEND */ 196 .macro FIXUP_TOP_OF_STACK tmp offset=0 197 movq PER_CPU_VAR(old_rsp),\tmp 198 movq \tmp,RSP+\offset(%rsp) 199 movq $__USER_DS,SS+\offset(%rsp) 200 movq $__USER_CS,CS+\offset(%rsp) 201 movq $-1,RCX+\offset(%rsp) 202 movq R11+\offset(%rsp),\tmp /* get eflags */ 203 movq \tmp,EFLAGS+\offset(%rsp) 204 .endm 205 206 .macro RESTORE_TOP_OF_STACK tmp offset=0 207 movq RSP+\offset(%rsp),\tmp 208 movq \tmp,PER_CPU_VAR(old_rsp) 209 movq EFLAGS+\offset(%rsp),\tmp 210 movq \tmp,R11+\offset(%rsp) 211 .endm 212 213 .macro FAKE_STACK_FRAME child_rip 214 /* push in order ss, rsp, eflags, cs, rip */ 215 xorl %eax, %eax 216 pushq $__KERNEL_DS /* ss */ 217 CFI_ADJUST_CFA_OFFSET 8 218 /*CFI_REL_OFFSET ss,0*/ 219 pushq %rax /* rsp */ 220 CFI_ADJUST_CFA_OFFSET 8 221 CFI_REL_OFFSET rsp,0 222 pushq $X86_EFLAGS_IF /* eflags - interrupts on */ 223 CFI_ADJUST_CFA_OFFSET 8 224 /*CFI_REL_OFFSET rflags,0*/ 225 pushq $__KERNEL_CS /* cs */ 226 CFI_ADJUST_CFA_OFFSET 8 227 /*CFI_REL_OFFSET cs,0*/ 228 pushq \child_rip /* rip */ 229 CFI_ADJUST_CFA_OFFSET 8 230 CFI_REL_OFFSET rip,0 231 pushq %rax /* orig rax */ 232 CFI_ADJUST_CFA_OFFSET 8 233 .endm 234 235 .macro UNFAKE_STACK_FRAME 236 addq $8*6, %rsp 237 CFI_ADJUST_CFA_OFFSET -(6*8) 238 .endm 239 240/* 241 * initial frame state for interrupts (and exceptions without error code) 242 */ 243 .macro EMPTY_FRAME start=1 offset=0 244 .if \start 245 CFI_STARTPROC simple 246 CFI_SIGNAL_FRAME 247 CFI_DEF_CFA rsp,8+\offset 248 .else 249 CFI_DEF_CFA_OFFSET 8+\offset 250 .endif 251 .endm 252 253/* 254 * initial frame state for interrupts (and exceptions without error code) 255 */ 256 .macro INTR_FRAME start=1 offset=0 257 EMPTY_FRAME \start, SS+8+\offset-RIP 258 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ 259 CFI_REL_OFFSET rsp, RSP+\offset-RIP 260 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ 261 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ 262 CFI_REL_OFFSET rip, RIP+\offset-RIP 263 .endm 264 265/* 266 * initial frame state for exceptions with error code (and interrupts 267 * with vector already pushed) 268 */ 269 .macro XCPT_FRAME start=1 offset=0 270 INTR_FRAME \start, RIP+\offset-ORIG_RAX 271 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ 272 .endm 273 274/* 275 * frame that enables calling into C. 276 */ 277 .macro PARTIAL_FRAME start=1 offset=0 278 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET 279 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET 280 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET 281 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET 282 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET 283 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET 284 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET 285 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET 286 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET 287 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET 288 .endm 289 290/* 291 * frame that enables passing a complete pt_regs to a C function. 292 */ 293 .macro DEFAULT_FRAME start=1 offset=0 294 PARTIAL_FRAME \start, R11+\offset-R15 295 CFI_REL_OFFSET rbx, RBX+\offset 296 CFI_REL_OFFSET rbp, RBP+\offset 297 CFI_REL_OFFSET r12, R12+\offset 298 CFI_REL_OFFSET r13, R13+\offset 299 CFI_REL_OFFSET r14, R14+\offset 300 CFI_REL_OFFSET r15, R15+\offset 301 .endm 302 303/* save partial stack frame */ 304ENTRY(save_args) 305 XCPT_FRAME 306 cld 307 movq_cfi rdi, RDI+16-ARGOFFSET 308 movq_cfi rsi, RSI+16-ARGOFFSET 309 movq_cfi rdx, RDX+16-ARGOFFSET 310 movq_cfi rcx, RCX+16-ARGOFFSET 311 movq_cfi rax, RAX+16-ARGOFFSET 312 movq_cfi r8, R8+16-ARGOFFSET 313 movq_cfi r9, R9+16-ARGOFFSET 314 movq_cfi r10, R10+16-ARGOFFSET 315 movq_cfi r11, R11+16-ARGOFFSET 316 317 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ 318 movq_cfi rbp, 8 /* push %rbp */ 319 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 320 testl $3, CS(%rdi) 321 je 1f 322 SWAPGS 323 /* 324 * irq_count is used to check if a CPU is already on an interrupt stack 325 * or not. While this is essentially redundant with preempt_count it is 326 * a little cheaper to use a separate counter in the PDA (short of 327 * moving irq_enter into assembly, which would be too much work) 328 */ 3291: incl PER_CPU_VAR(irq_count) 330 jne 2f 331 popq_cfi %rax /* move return address... */ 332 mov PER_CPU_VAR(irq_stack_ptr),%rsp 333 EMPTY_FRAME 0 334 pushq_cfi %rbp /* backlink for unwinder */ 335 pushq_cfi %rax /* ... to the new stack */ 336 /* 337 * We entered an interrupt context - irqs are off: 338 */ 3392: TRACE_IRQS_OFF 340 ret 341 CFI_ENDPROC 342END(save_args) 343 344ENTRY(save_rest) 345 PARTIAL_FRAME 1 REST_SKIP+8 346 movq 5*8+16(%rsp), %r11 /* save return address */ 347 movq_cfi rbx, RBX+16 348 movq_cfi rbp, RBP+16 349 movq_cfi r12, R12+16 350 movq_cfi r13, R13+16 351 movq_cfi r14, R14+16 352 movq_cfi r15, R15+16 353 movq %r11, 8(%rsp) /* return address */ 354 FIXUP_TOP_OF_STACK %r11, 16 355 ret 356 CFI_ENDPROC 357END(save_rest) 358 359/* save complete stack frame */ 360 .pushsection .kprobes.text, "ax" 361ENTRY(save_paranoid) 362 XCPT_FRAME 1 RDI+8 363 cld 364 movq_cfi rdi, RDI+8 365 movq_cfi rsi, RSI+8 366 movq_cfi rdx, RDX+8 367 movq_cfi rcx, RCX+8 368 movq_cfi rax, RAX+8 369 movq_cfi r8, R8+8 370 movq_cfi r9, R9+8 371 movq_cfi r10, R10+8 372 movq_cfi r11, R11+8 373 movq_cfi rbx, RBX+8 374 movq_cfi rbp, RBP+8 375 movq_cfi r12, R12+8 376 movq_cfi r13, R13+8 377 movq_cfi r14, R14+8 378 movq_cfi r15, R15+8 379 movl $1,%ebx 380 movl $MSR_GS_BASE,%ecx 381 rdmsr 382 testl %edx,%edx 383 js 1f /* negative -> in kernel */ 384 SWAPGS 385 xorl %ebx,%ebx 3861: ret 387 CFI_ENDPROC 388END(save_paranoid) 389 .popsection 390 391/* 392 * A newly forked process directly context switches into this address. 393 * 394 * rdi: prev task we switched from 395 */ 396ENTRY(ret_from_fork) 397 DEFAULT_FRAME 398 399 LOCK ; btr $TIF_FORK,TI_flags(%r8) 400 401 push kernel_eflags(%rip) 402 CFI_ADJUST_CFA_OFFSET 8 403 popf # reset kernel eflags 404 CFI_ADJUST_CFA_OFFSET -8 405 406 call schedule_tail # rdi: 'prev' task parameter 407 408 GET_THREAD_INFO(%rcx) 409 410 RESTORE_REST 411 412 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 413 je int_ret_from_sys_call 414 415 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET 416 jnz int_ret_from_sys_call 417 418 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET 419 jmp ret_from_sys_call # go to the SYSRET fastpath 420 421 CFI_ENDPROC 422END(ret_from_fork) 423 424/* 425 * System call entry. Upto 6 arguments in registers are supported. 426 * 427 * SYSCALL does not save anything on the stack and does not change the 428 * stack pointer. 429 */ 430 431/* 432 * Register setup: 433 * rax system call number 434 * rdi arg0 435 * rcx return address for syscall/sysret, C arg3 436 * rsi arg1 437 * rdx arg2 438 * r10 arg3 (--> moved to rcx for C) 439 * r8 arg4 440 * r9 arg5 441 * r11 eflags for syscall/sysret, temporary for C 442 * r12-r15,rbp,rbx saved by C code, not touched. 443 * 444 * Interrupts are off on entry. 445 * Only called from user space. 446 * 447 * XXX if we had a free scratch register we could save the RSP into the stack frame 448 * and report it properly in ps. Unfortunately we haven't. 449 * 450 * When user can change the frames always force IRET. That is because 451 * it deals with uncanonical addresses better. SYSRET has trouble 452 * with them due to bugs in both AMD and Intel CPUs. 453 */ 454 455ENTRY(system_call) 456 CFI_STARTPROC simple 457 CFI_SIGNAL_FRAME 458 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET 459 CFI_REGISTER rip,rcx 460 /*CFI_REGISTER rflags,r11*/ 461 SWAPGS_UNSAFE_STACK 462 /* 463 * A hypervisor implementation might want to use a label 464 * after the swapgs, so that it can do the swapgs 465 * for the guest and jump here on syscall. 466 */ 467ENTRY(system_call_after_swapgs) 468 469 movq %rsp,PER_CPU_VAR(old_rsp) 470 movq PER_CPU_VAR(kernel_stack),%rsp 471 /* 472 * No need to follow this irqs off/on section - it's straight 473 * and short: 474 */ 475 ENABLE_INTERRUPTS(CLBR_NONE) 476 SAVE_ARGS 8,1 477 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 478 movq %rcx,RIP-ARGOFFSET(%rsp) 479 CFI_REL_OFFSET rip,RIP-ARGOFFSET 480 GET_THREAD_INFO(%rcx) 481 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) 482 jnz tracesys 483system_call_fastpath: 484 cmpq $__NR_syscall_max,%rax 485 ja badsys 486 movq %r10,%rcx 487 call *sys_call_table(,%rax,8) # XXX: rip relative 488 movq %rax,RAX-ARGOFFSET(%rsp) 489/* 490 * Syscall return path ending with SYSRET (fast path) 491 * Has incomplete stack frame and undefined top of stack. 492 */ 493ret_from_sys_call: 494 movl $_TIF_ALLWORK_MASK,%edi 495 /* edi: flagmask */ 496sysret_check: 497 LOCKDEP_SYS_EXIT 498 GET_THREAD_INFO(%rcx) 499 DISABLE_INTERRUPTS(CLBR_NONE) 500 TRACE_IRQS_OFF 501 movl TI_flags(%rcx),%edx 502 andl %edi,%edx 503 jnz sysret_careful 504 CFI_REMEMBER_STATE 505 /* 506 * sysretq will re-enable interrupts: 507 */ 508 TRACE_IRQS_ON 509 movq RIP-ARGOFFSET(%rsp),%rcx 510 CFI_REGISTER rip,rcx 511 RESTORE_ARGS 0,-ARG_SKIP,1 512 /*CFI_REGISTER rflags,r11*/ 513 movq PER_CPU_VAR(old_rsp), %rsp 514 USERGS_SYSRET64 515 516 CFI_RESTORE_STATE 517 /* Handle reschedules */ 518 /* edx: work, edi: workmask */ 519sysret_careful: 520 bt $TIF_NEED_RESCHED,%edx 521 jnc sysret_signal 522 TRACE_IRQS_ON 523 ENABLE_INTERRUPTS(CLBR_NONE) 524 pushq %rdi 525 CFI_ADJUST_CFA_OFFSET 8 526 call schedule 527 popq %rdi 528 CFI_ADJUST_CFA_OFFSET -8 529 jmp sysret_check 530 531 /* Handle a signal */ 532sysret_signal: 533 TRACE_IRQS_ON 534 ENABLE_INTERRUPTS(CLBR_NONE) 535#ifdef CONFIG_AUDITSYSCALL 536 bt $TIF_SYSCALL_AUDIT,%edx 537 jc sysret_audit 538#endif 539 /* 540 * We have a signal, or exit tracing or single-step. 541 * These all wind up with the iret return path anyway, 542 * so just join that path right now. 543 */ 544 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET 545 jmp int_check_syscall_exit_work 546 547badsys: 548 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 549 jmp ret_from_sys_call 550 551#ifdef CONFIG_AUDITSYSCALL 552 /* 553 * Fast path for syscall audit without full syscall trace. 554 * We just call audit_syscall_entry() directly, and then 555 * jump back to the normal fast path. 556 */ 557auditsys: 558 movq %r10,%r9 /* 6th arg: 4th syscall arg */ 559 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ 560 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ 561 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ 562 movq %rax,%rsi /* 2nd arg: syscall number */ 563 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ 564 call audit_syscall_entry 565 LOAD_ARGS 0 /* reload call-clobbered registers */ 566 jmp system_call_fastpath 567 568 /* 569 * Return fast path for syscall audit. Call audit_syscall_exit() 570 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT 571 * masked off. 572 */ 573sysret_audit: 574 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ 575 cmpq $0,%rsi /* is it < 0? */ 576 setl %al /* 1 if so, 0 if not */ 577 movzbl %al,%edi /* zero-extend that into %edi */ 578 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 579 call audit_syscall_exit 580 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 581 jmp sysret_check 582#endif /* CONFIG_AUDITSYSCALL */ 583 584 /* Do syscall tracing */ 585tracesys: 586#ifdef CONFIG_AUDITSYSCALL 587 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 588 jz auditsys 589#endif 590 SAVE_REST 591 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 592 FIXUP_TOP_OF_STACK %rdi 593 movq %rsp,%rdi 594 call syscall_trace_enter 595 /* 596 * Reload arg registers from stack in case ptrace changed them. 597 * We don't reload %rax because syscall_trace_enter() returned 598 * the value it wants us to use in the table lookup. 599 */ 600 LOAD_ARGS ARGOFFSET, 1 601 RESTORE_REST 602 cmpq $__NR_syscall_max,%rax 603 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 604 movq %r10,%rcx /* fixup for C */ 605 call *sys_call_table(,%rax,8) 606 movq %rax,RAX-ARGOFFSET(%rsp) 607 /* Use IRET because user could have changed frame */ 608 609/* 610 * Syscall return path ending with IRET. 611 * Has correct top of stack, but partial stack frame. 612 */ 613GLOBAL(int_ret_from_sys_call) 614 DISABLE_INTERRUPTS(CLBR_NONE) 615 TRACE_IRQS_OFF 616 testl $3,CS-ARGOFFSET(%rsp) 617 je retint_restore_args 618 movl $_TIF_ALLWORK_MASK,%edi 619 /* edi: mask to check */ 620GLOBAL(int_with_check) 621 LOCKDEP_SYS_EXIT_IRQ 622 GET_THREAD_INFO(%rcx) 623 movl TI_flags(%rcx),%edx 624 andl %edi,%edx 625 jnz int_careful 626 andl $~TS_COMPAT,TI_status(%rcx) 627 jmp retint_swapgs 628 629 /* Either reschedule or signal or syscall exit tracking needed. */ 630 /* First do a reschedule test. */ 631 /* edx: work, edi: workmask */ 632int_careful: 633 bt $TIF_NEED_RESCHED,%edx 634 jnc int_very_careful 635 TRACE_IRQS_ON 636 ENABLE_INTERRUPTS(CLBR_NONE) 637 pushq %rdi 638 CFI_ADJUST_CFA_OFFSET 8 639 call schedule 640 popq %rdi 641 CFI_ADJUST_CFA_OFFSET -8 642 DISABLE_INTERRUPTS(CLBR_NONE) 643 TRACE_IRQS_OFF 644 jmp int_with_check 645 646 /* handle signals and tracing -- both require a full stack frame */ 647int_very_careful: 648 TRACE_IRQS_ON 649 ENABLE_INTERRUPTS(CLBR_NONE) 650int_check_syscall_exit_work: 651 SAVE_REST 652 /* Check for syscall exit trace */ 653 testl $_TIF_WORK_SYSCALL_EXIT,%edx 654 jz int_signal 655 pushq %rdi 656 CFI_ADJUST_CFA_OFFSET 8 657 leaq 8(%rsp),%rdi # &ptregs -> arg1 658 call syscall_trace_leave 659 popq %rdi 660 CFI_ADJUST_CFA_OFFSET -8 661 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 662 jmp int_restore_rest 663 664int_signal: 665 testl $_TIF_DO_NOTIFY_MASK,%edx 666 jz 1f 667 movq %rsp,%rdi # &ptregs -> arg1 668 xorl %esi,%esi # oldset -> arg2 669 call do_notify_resume 6701: movl $_TIF_WORK_MASK,%edi 671int_restore_rest: 672 RESTORE_REST 673 DISABLE_INTERRUPTS(CLBR_NONE) 674 TRACE_IRQS_OFF 675 jmp int_with_check 676 CFI_ENDPROC 677END(system_call) 678 679/* 680 * Certain special system calls that need to save a complete full stack frame. 681 */ 682 .macro PTREGSCALL label,func,arg 683ENTRY(\label) 684 PARTIAL_FRAME 1 8 /* offset 8: return address */ 685 subq $REST_SKIP, %rsp 686 CFI_ADJUST_CFA_OFFSET REST_SKIP 687 call save_rest 688 DEFAULT_FRAME 0 8 /* offset 8: return address */ 689 leaq 8(%rsp), \arg /* pt_regs pointer */ 690 call \func 691 jmp ptregscall_common 692 CFI_ENDPROC 693END(\label) 694 .endm 695 696 PTREGSCALL stub_clone, sys_clone, %r8 697 PTREGSCALL stub_fork, sys_fork, %rdi 698 PTREGSCALL stub_vfork, sys_vfork, %rdi 699 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 700 PTREGSCALL stub_iopl, sys_iopl, %rsi 701 702ENTRY(ptregscall_common) 703 DEFAULT_FRAME 1 8 /* offset 8: return address */ 704 RESTORE_TOP_OF_STACK %r11, 8 705 movq_cfi_restore R15+8, r15 706 movq_cfi_restore R14+8, r14 707 movq_cfi_restore R13+8, r13 708 movq_cfi_restore R12+8, r12 709 movq_cfi_restore RBP+8, rbp 710 movq_cfi_restore RBX+8, rbx 711 ret $REST_SKIP /* pop extended registers */ 712 CFI_ENDPROC 713END(ptregscall_common) 714 715ENTRY(stub_execve) 716 CFI_STARTPROC 717 popq %r11 718 CFI_ADJUST_CFA_OFFSET -8 719 CFI_REGISTER rip, r11 720 SAVE_REST 721 FIXUP_TOP_OF_STACK %r11 722 movq %rsp, %rcx 723 call sys_execve 724 RESTORE_TOP_OF_STACK %r11 725 movq %rax,RAX(%rsp) 726 RESTORE_REST 727 jmp int_ret_from_sys_call 728 CFI_ENDPROC 729END(stub_execve) 730 731/* 732 * sigreturn is special because it needs to restore all registers on return. 733 * This cannot be done with SYSRET, so use the IRET return path instead. 734 */ 735ENTRY(stub_rt_sigreturn) 736 CFI_STARTPROC 737 addq $8, %rsp 738 CFI_ADJUST_CFA_OFFSET -8 739 SAVE_REST 740 movq %rsp,%rdi 741 FIXUP_TOP_OF_STACK %r11 742 call sys_rt_sigreturn 743 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 744 RESTORE_REST 745 jmp int_ret_from_sys_call 746 CFI_ENDPROC 747END(stub_rt_sigreturn) 748 749/* 750 * Build the entry stubs and pointer table with some assembler magic. 751 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 752 * single cache line on all modern x86 implementations. 753 */ 754 .section .init.rodata,"a" 755ENTRY(interrupt) 756 .text 757 .p2align 5 758 .p2align CONFIG_X86_L1_CACHE_SHIFT 759ENTRY(irq_entries_start) 760 INTR_FRAME 761vector=FIRST_EXTERNAL_VECTOR 762.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 763 .balign 32 764 .rept 7 765 .if vector < NR_VECTORS 766 .if vector <> FIRST_EXTERNAL_VECTOR 767 CFI_ADJUST_CFA_OFFSET -8 768 .endif 7691: pushq $(~vector+0x80) /* Note: always in signed byte range */ 770 CFI_ADJUST_CFA_OFFSET 8 771 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 772 jmp 2f 773 .endif 774 .previous 775 .quad 1b 776 .text 777vector=vector+1 778 .endif 779 .endr 7802: jmp common_interrupt 781.endr 782 CFI_ENDPROC 783END(irq_entries_start) 784 785.previous 786END(interrupt) 787.previous 788 789/* 790 * Interrupt entry/exit. 791 * 792 * Interrupt entry points save only callee clobbered registers in fast path. 793 * 794 * Entry runs with interrupts off. 795 */ 796 797/* 0(%rsp): ~(interrupt number) */ 798 .macro interrupt func 799 subq $10*8, %rsp 800 CFI_ADJUST_CFA_OFFSET 10*8 801 call save_args 802 PARTIAL_FRAME 0 803 call \func 804 .endm 805 806/* 807 * Interrupt entry/exit should be protected against kprobes 808 */ 809 .pushsection .kprobes.text, "ax" 810 /* 811 * The interrupt stubs push (~vector+0x80) onto the stack and 812 * then jump to common_interrupt. 813 */ 814 .p2align CONFIG_X86_L1_CACHE_SHIFT 815common_interrupt: 816 XCPT_FRAME 817 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 818 interrupt do_IRQ 819 /* 0(%rsp): old_rsp-ARGOFFSET */ 820ret_from_intr: 821 DISABLE_INTERRUPTS(CLBR_NONE) 822 TRACE_IRQS_OFF 823 decl PER_CPU_VAR(irq_count) 824 leaveq 825 CFI_DEF_CFA_REGISTER rsp 826 CFI_ADJUST_CFA_OFFSET -8 827exit_intr: 828 GET_THREAD_INFO(%rcx) 829 testl $3,CS-ARGOFFSET(%rsp) 830 je retint_kernel 831 832 /* Interrupt came from user space */ 833 /* 834 * Has a correct top of stack, but a partial stack frame 835 * %rcx: thread info. Interrupts off. 836 */ 837retint_with_reschedule: 838 movl $_TIF_WORK_MASK,%edi 839retint_check: 840 LOCKDEP_SYS_EXIT_IRQ 841 movl TI_flags(%rcx),%edx 842 andl %edi,%edx 843 CFI_REMEMBER_STATE 844 jnz retint_careful 845 846retint_swapgs: /* return to user-space */ 847 /* 848 * The iretq could re-enable interrupts: 849 */ 850 DISABLE_INTERRUPTS(CLBR_ANY) 851 TRACE_IRQS_IRETQ 852 SWAPGS 853 jmp restore_args 854 855retint_restore_args: /* return to kernel space */ 856 DISABLE_INTERRUPTS(CLBR_ANY) 857 /* 858 * The iretq could re-enable interrupts: 859 */ 860 TRACE_IRQS_IRETQ 861restore_args: 862 RESTORE_ARGS 0,8,0 863 864irq_return: 865 INTERRUPT_RETURN 866 867 .section __ex_table, "a" 868 .quad irq_return, bad_iret 869 .previous 870 871#ifdef CONFIG_PARAVIRT 872ENTRY(native_iret) 873 iretq 874 875 .section __ex_table,"a" 876 .quad native_iret, bad_iret 877 .previous 878#endif 879 880 .section .fixup,"ax" 881bad_iret: 882 /* 883 * The iret traps when the %cs or %ss being restored is bogus. 884 * We've lost the original trap vector and error code. 885 * #GPF is the most likely one to get for an invalid selector. 886 * So pretend we completed the iret and took the #GPF in user mode. 887 * 888 * We are now running with the kernel GS after exception recovery. 889 * But error_entry expects us to have user GS to match the user %cs, 890 * so swap back. 891 */ 892 pushq $0 893 894 SWAPGS 895 jmp general_protection 896 897 .previous 898 899 /* edi: workmask, edx: work */ 900retint_careful: 901 CFI_RESTORE_STATE 902 bt $TIF_NEED_RESCHED,%edx 903 jnc retint_signal 904 TRACE_IRQS_ON 905 ENABLE_INTERRUPTS(CLBR_NONE) 906 pushq %rdi 907 CFI_ADJUST_CFA_OFFSET 8 908 call schedule 909 popq %rdi 910 CFI_ADJUST_CFA_OFFSET -8 911 GET_THREAD_INFO(%rcx) 912 DISABLE_INTERRUPTS(CLBR_NONE) 913 TRACE_IRQS_OFF 914 jmp retint_check 915 916retint_signal: 917 testl $_TIF_DO_NOTIFY_MASK,%edx 918 jz retint_swapgs 919 TRACE_IRQS_ON 920 ENABLE_INTERRUPTS(CLBR_NONE) 921 SAVE_REST 922 movq $-1,ORIG_RAX(%rsp) 923 xorl %esi,%esi # oldset 924 movq %rsp,%rdi # &pt_regs 925 call do_notify_resume 926 RESTORE_REST 927 DISABLE_INTERRUPTS(CLBR_NONE) 928 TRACE_IRQS_OFF 929 GET_THREAD_INFO(%rcx) 930 jmp retint_with_reschedule 931 932#ifdef CONFIG_PREEMPT 933 /* Returning to kernel space. Check if we need preemption */ 934 /* rcx: threadinfo. interrupts off. */ 935ENTRY(retint_kernel) 936 cmpl $0,TI_preempt_count(%rcx) 937 jnz retint_restore_args 938 bt $TIF_NEED_RESCHED,TI_flags(%rcx) 939 jnc retint_restore_args 940 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 941 jnc retint_restore_args 942 call preempt_schedule_irq 943 jmp exit_intr 944#endif 945 946 CFI_ENDPROC 947END(common_interrupt) 948/* 949 * End of kprobes section 950 */ 951 .popsection 952 953/* 954 * APIC interrupts. 955 */ 956.macro apicinterrupt num sym do_sym 957ENTRY(\sym) 958 INTR_FRAME 959 pushq $~(\num) 960 CFI_ADJUST_CFA_OFFSET 8 961 interrupt \do_sym 962 jmp ret_from_intr 963 CFI_ENDPROC 964END(\sym) 965.endm 966 967#ifdef CONFIG_SMP 968apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ 969 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 970apicinterrupt REBOOT_VECTOR \ 971 reboot_interrupt smp_reboot_interrupt 972#endif 973 974#ifdef CONFIG_X86_UV 975apicinterrupt UV_BAU_MESSAGE \ 976 uv_bau_message_intr1 uv_bau_message_interrupt 977#endif 978apicinterrupt LOCAL_TIMER_VECTOR \ 979 apic_timer_interrupt smp_apic_timer_interrupt 980apicinterrupt X86_PLATFORM_IPI_VECTOR \ 981 x86_platform_ipi smp_x86_platform_ipi 982 983#ifdef CONFIG_SMP 984apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ 985 invalidate_interrupt0 smp_invalidate_interrupt 986apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ 987 invalidate_interrupt1 smp_invalidate_interrupt 988apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ 989 invalidate_interrupt2 smp_invalidate_interrupt 990apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ 991 invalidate_interrupt3 smp_invalidate_interrupt 992apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ 993 invalidate_interrupt4 smp_invalidate_interrupt 994apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ 995 invalidate_interrupt5 smp_invalidate_interrupt 996apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ 997 invalidate_interrupt6 smp_invalidate_interrupt 998apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ 999 invalidate_interrupt7 smp_invalidate_interrupt 1000#endif
1001 1002apicinterrupt THRESHOLD_APIC_VECTOR \ 1003 threshold_interrupt smp_threshold_interrupt 1004apicinterrupt THERMAL_APIC_VECTOR \ 1005 thermal_interrupt smp_thermal_interrupt 1006 1007#ifdef CONFIG_X86_MCE 1008apicinterrupt MCE_SELF_VECTOR \ 1009 mce_self_interrupt smp_mce_self_interrupt 1010#endif 1011 1012#ifdef CONFIG_SMP 1013apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ 1014 call_function_single_interrupt smp_call_function_single_interrupt 1015apicinterrupt CALL_FUNCTION_VECTOR \ 1016 call_function_interrupt smp_call_function_interrupt 1017apicinterrupt RESCHEDULE_VECTOR \ 1018 reschedule_interrupt smp_reschedule_interrupt 1019#endif 1020 1021apicinterrupt ERROR_APIC_VECTOR \ 1022 error_interrupt smp_error_interrupt 1023apicinterrupt SPURIOUS_APIC_VECTOR \ 1024 spurious_interrupt smp_spurious_interrupt 1025 1026#ifdef CONFIG_PERF_EVENTS 1027apicinterrupt LOCAL_PENDING_VECTOR \ 1028 perf_pending_interrupt smp_perf_pending_interrupt 1029#endif 1030 1031/* 1032 * Exception entry points. 1033 */ 1034.macro zeroentry sym do_sym 1035ENTRY(\sym) 1036 INTR_FRAME 1037 PARAVIRT_ADJUST_EXCEPTION_FRAME 1038 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1039 subq $15*8,%rsp 1040 CFI_ADJUST_CFA_OFFSET 15*8 1041 call error_entry 1042 DEFAULT_FRAME 0 1043 movq %rsp,%rdi /* pt_regs pointer */ 1044 xorl %esi,%esi /* no error code */ 1045 call \do_sym 1046 jmp error_exit /* %ebx: no swapgs flag */ 1047 CFI_ENDPROC 1048END(\sym) 1049.endm 1050 1051.macro paranoidzeroentry sym do_sym 1052ENTRY(\sym) 1053 INTR_FRAME 1054 PARAVIRT_ADJUST_EXCEPTION_FRAME 1055 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1056 CFI_ADJUST_CFA_OFFSET 8 1057 subq $15*8, %rsp 1058 call save_paranoid 1059 TRACE_IRQS_OFF 1060 movq %rsp,%rdi /* pt_regs pointer */ 1061 xorl %esi,%esi /* no error code */ 1062 call \do_sym 1063 jmp paranoid_exit /* %ebx: no swapgs flag */ 1064 CFI_ENDPROC 1065END(\sym) 1066.endm 1067 1068.macro paranoidzeroentry_ist sym do_sym ist 1069ENTRY(\sym) 1070 INTR_FRAME 1071 PARAVIRT_ADJUST_EXCEPTION_FRAME 1072 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1073 CFI_ADJUST_CFA_OFFSET 8 1074 subq $15*8, %rsp 1075 call save_paranoid 1076 TRACE_IRQS_OFF 1077 movq %rsp,%rdi /* pt_regs pointer */ 1078 xorl %esi,%esi /* no error code */ 1079 PER_CPU(init_tss, %r12) 1080 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) 1081 call \do_sym 1082 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) 1083 jmp paranoid_exit /* %ebx: no swapgs flag */ 1084 CFI_ENDPROC 1085END(\sym) 1086.endm 1087 1088.macro errorentry sym do_sym 1089ENTRY(\sym) 1090 XCPT_FRAME 1091 PARAVIRT_ADJUST_EXCEPTION_FRAME 1092 subq $15*8,%rsp 1093 CFI_ADJUST_CFA_OFFSET 15*8 1094 call error_entry 1095 DEFAULT_FRAME 0 1096 movq %rsp,%rdi /* pt_regs pointer */ 1097 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1098 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1099 call \do_sym 1100 jmp error_exit /* %ebx: no swapgs flag */ 1101 CFI_ENDPROC 1102END(\sym) 1103.endm 1104 1105 /* error code is on the stack already */ 1106.macro paranoiderrorentry sym do_sym 1107ENTRY(\sym) 1108 XCPT_FRAME 1109 PARAVIRT_ADJUST_EXCEPTION_FRAME 1110 subq $15*8,%rsp 1111 CFI_ADJUST_CFA_OFFSET 15*8 1112 call save_paranoid 1113 DEFAULT_FRAME 0 1114 TRACE_IRQS_OFF 1115 movq %rsp,%rdi /* pt_regs pointer */ 1116 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1117 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1118 call \do_sym 1119 jmp paranoid_exit /* %ebx: no swapgs flag */ 1120 CFI_ENDPROC 1121END(\sym) 1122.endm 1123 1124zeroentry divide_error do_divide_error 1125zeroentry overflow do_overflow 1126zeroentry bounds do_bounds 1127zeroentry invalid_op do_invalid_op 1128zeroentry device_not_available do_device_not_available 1129paranoiderrorentry double_fault do_double_fault 1130zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun 1131errorentry invalid_TSS do_invalid_TSS 1132errorentry segment_not_present do_segment_not_present 1133zeroentry spurious_interrupt_bug do_spurious_interrupt_bug 1134zeroentry coprocessor_error do_coprocessor_error 1135errorentry alignment_check do_alignment_check 1136zeroentry simd_coprocessor_error do_simd_coprocessor_error 1137 1138 /* Reload gs selector with exception handling */ 1139 /* edi: new selector */ 1140ENTRY(native_load_gs_index) 1141 CFI_STARTPROC 1142 pushf 1143 CFI_ADJUST_CFA_OFFSET 8 1144 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 1145 SWAPGS 1146gs_change: 1147 movl %edi,%gs 11482: mfence /* workaround */ 1149 SWAPGS 1150 popf 1151 CFI_ADJUST_CFA_OFFSET -8 1152 ret 1153 CFI_ENDPROC 1154END(native_load_gs_index) 1155 1156 .section __ex_table,"a" 1157 .align 8 1158 .quad gs_change,bad_gs 1159 .previous 1160 .section .fixup,"ax" 1161 /* running with kernelgs */ 1162bad_gs: 1163 SWAPGS /* switch back to user gs */ 1164 xorl %eax,%eax 1165 movl %eax,%gs 1166 jmp 2b 1167 .previous 1168 1169ENTRY(kernel_thread_helper) 1170 pushq $0 # fake return address 1171 CFI_STARTPROC 1172 /* 1173 * Here we are in the child and the registers are set as they were 1174 * at kernel_thread() invocation in the parent. 1175 */ 1176 call *%rsi 1177 # exit 1178 mov %eax, %edi 1179 call do_exit 1180 ud2 # padding for call trace 1181 CFI_ENDPROC 1182END(kernel_thread_helper) 1183 1184/* 1185 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1186 * 1187 * C extern interface: 1188 * extern long execve(char *name, char **argv, char **envp) 1189 * 1190 * asm input arguments: 1191 * rdi: name, rsi: argv, rdx: envp 1192 * 1193 * We want to fallback into: 1194 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) 1195 * 1196 * do_sys_execve asm fallback arguments: 1197 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack 1198 */ 1199ENTRY(kernel_execve) 1200 CFI_STARTPROC 1201 FAKE_STACK_FRAME $0 1202 SAVE_ALL 1203 movq %rsp,%rcx 1204 call sys_execve 1205 movq %rax, RAX(%rsp) 1206 RESTORE_REST 1207 testq %rax,%rax 1208 je int_ret_from_sys_call 1209 RESTORE_ARGS 1210 UNFAKE_STACK_FRAME 1211 ret 1212 CFI_ENDPROC 1213END(kernel_execve) 1214 1215/* Call softirq on interrupt stack. Interrupts are off. */ 1216ENTRY(call_softirq) 1217 CFI_STARTPROC 1218 push %rbp 1219 CFI_ADJUST_CFA_OFFSET 8 1220 CFI_REL_OFFSET rbp,0 1221 mov %rsp,%rbp 1222 CFI_DEF_CFA_REGISTER rbp 1223 incl PER_CPU_VAR(irq_count) 1224 cmove PER_CPU_VAR(irq_stack_ptr),%rsp 1225 push %rbp # backlink for old unwinder 1226 call __do_softirq 1227 leaveq 1228 CFI_DEF_CFA_REGISTER rsp 1229 CFI_ADJUST_CFA_OFFSET -8 1230 decl PER_CPU_VAR(irq_count) 1231 ret 1232 CFI_ENDPROC 1233END(call_softirq) 1234 1235#ifdef CONFIG_XEN 1236zeroentry xen_hypervisor_callback xen_do_hypervisor_callback 1237 1238/* 1239 * A note on the "critical region" in our callback handler. 1240 * We want to avoid stacking callback handlers due to events occurring 1241 * during handling of the last event. To do this, we keep events disabled 1242 * until we've done all processing. HOWEVER, we must enable events before 1243 * popping the stack frame (can't be done atomically) and so it would still 1244 * be possible to get enough handler activations to overflow the stack. 1245 * Although unlikely, bugs of that kind are hard to track down, so we'd 1246 * like to avoid the possibility. 1247 * So, on entry to the handler we detect whether we interrupted an 1248 * existing activation in its critical region -- if so, we pop the current 1249 * activation and restart the handler using the previous one. 1250 */ 1251ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1252 CFI_STARTPROC 1253/* 1254 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1255 * see the correct pointer to the pt_regs 1256 */ 1257 movq %rdi, %rsp # we don't return, adjust the stack frame 1258 CFI_ENDPROC 1259 DEFAULT_FRAME 126011: incl PER_CPU_VAR(irq_count) 1261 movq %rsp,%rbp 1262 CFI_DEF_CFA_REGISTER rbp 1263 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 1264 pushq %rbp # backlink for old unwinder 1265 call xen_evtchn_do_upcall 1266 popq %rsp 1267 CFI_DEF_CFA_REGISTER rsp 1268 decl PER_CPU_VAR(irq_count) 1269 jmp error_exit 1270 CFI_ENDPROC 1271END(do_hypervisor_callback) 1272 1273/* 1274 * Hypervisor uses this for application faults while it executes. 1275 * We get here for two reasons: 1276 * 1. Fault while reloading DS, ES, FS or GS 1277 * 2. Fault while executing IRET 1278 * Category 1 we do not need to fix up as Xen has already reloaded all segment 1279 * registers that could be reloaded and zeroed the others. 1280 * Category 2 we fix up by killing the current process. We cannot use the 1281 * normal Linux return path in this case because if we use the IRET hypercall 1282 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1283 * We distinguish between categories by comparing each saved segment register 1284 * with its current contents: any discrepancy means we in category 1. 1285 */ 1286ENTRY(xen_failsafe_callback) 1287 INTR_FRAME 1 (6*8) 1288 /*CFI_REL_OFFSET gs,GS*/ 1289 /*CFI_REL_OFFSET fs,FS*/ 1290 /*CFI_REL_OFFSET es,ES*/ 1291 /*CFI_REL_OFFSET ds,DS*/ 1292 CFI_REL_OFFSET r11,8 1293 CFI_REL_OFFSET rcx,0 1294 movw %ds,%cx 1295 cmpw %cx,0x10(%rsp) 1296 CFI_REMEMBER_STATE 1297 jne 1f 1298 movw %es,%cx 1299 cmpw %cx,0x18(%rsp) 1300 jne 1f 1301 movw %fs,%cx 1302 cmpw %cx,0x20(%rsp) 1303 jne 1f 1304 movw %gs,%cx 1305 cmpw %cx,0x28(%rsp) 1306 jne 1f 1307 /* All segments match their saved values => Category 2 (Bad IRET). */ 1308 movq (%rsp),%rcx 1309 CFI_RESTORE rcx 1310 movq 8(%rsp),%r11 1311 CFI_RESTORE r11 1312 addq $0x30,%rsp 1313 CFI_ADJUST_CFA_OFFSET -0x30 1314 pushq_cfi $0 /* RIP */ 1315 pushq_cfi %r11 1316 pushq_cfi %rcx 1317 jmp general_protection 1318 CFI_RESTORE_STATE 13191: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 1320 movq (%rsp),%rcx 1321 CFI_RESTORE rcx 1322 movq 8(%rsp),%r11 1323 CFI_RESTORE r11 1324 addq $0x30,%rsp 1325 CFI_ADJUST_CFA_OFFSET -0x30 1326 pushq_cfi $0 1327 SAVE_ALL 1328 jmp error_exit 1329 CFI_ENDPROC 1330END(xen_failsafe_callback) 1331 1332#endif /* CONFIG_XEN */ 1333 1334/* 1335 * Some functions should be protected against kprobes 1336 */ 1337 .pushsection .kprobes.text, "ax" 1338 1339paranoidzeroentry_ist debug do_debug DEBUG_STACK 1340paranoidzeroentry_ist int3 do_int3 DEBUG_STACK 1341paranoiderrorentry stack_segment do_stack_segment 1342#ifdef CONFIG_XEN 1343zeroentry xen_debug do_debug 1344zeroentry xen_int3 do_int3 1345errorentry xen_stack_segment do_stack_segment 1346#endif 1347errorentry general_protection do_general_protection 1348errorentry page_fault do_page_fault 1349#ifdef CONFIG_X86_MCE 1350paranoidzeroentry machine_check *machine_check_vector(%rip) 1351#endif 1352 1353 /* 1354 * "Paranoid" exit path from exception stack. 1355 * Paranoid because this is used by NMIs and cannot take 1356 * any kernel state for granted. 1357 * We don't do kernel preemption checks here, because only 1358 * NMI should be common and it does not enable IRQs and 1359 * cannot get reschedule ticks. 1360 * 1361 * "trace" is 0 for the NMI handler only, because irq-tracing 1362 * is fundamentally NMI-unsafe. (we cannot change the soft and 1363 * hard flags at once, atomically) 1364 */ 1365 1366 /* ebx: no swapgs flag */ 1367ENTRY(paranoid_exit) 1368 INTR_FRAME 1369 DISABLE_INTERRUPTS(CLBR_NONE) 1370 TRACE_IRQS_OFF 1371 testl %ebx,%ebx /* swapgs needed? */ 1372 jnz paranoid_restore 1373 testl $3,CS(%rsp) 1374 jnz paranoid_userspace 1375paranoid_swapgs: 1376 TRACE_IRQS_IRETQ 0 1377 SWAPGS_UNSAFE_STACK 1378 RESTORE_ALL 8 1379 jmp irq_return 1380paranoid_restore: 1381 TRACE_IRQS_IRETQ 0 1382 RESTORE_ALL 8 1383 jmp irq_return 1384paranoid_userspace: 1385 GET_THREAD_INFO(%rcx) 1386 movl TI_flags(%rcx),%ebx 1387 andl $_TIF_WORK_MASK,%ebx 1388 jz paranoid_swapgs 1389 movq %rsp,%rdi /* &pt_regs */ 1390 call sync_regs 1391 movq %rax,%rsp /* switch stack for scheduling */ 1392 testl $_TIF_NEED_RESCHED,%ebx 1393 jnz paranoid_schedule 1394 movl %ebx,%edx /* arg3: thread flags */ 1395 TRACE_IRQS_ON 1396 ENABLE_INTERRUPTS(CLBR_NONE) 1397 xorl %esi,%esi /* arg2: oldset */ 1398 movq %rsp,%rdi /* arg1: &pt_regs */ 1399 call do_notify_resume 1400 DISABLE_INTERRUPTS(CLBR_NONE) 1401 TRACE_IRQS_OFF 1402 jmp paranoid_userspace 1403paranoid_schedule: 1404 TRACE_IRQS_ON 1405 ENABLE_INTERRUPTS(CLBR_ANY) 1406 call schedule 1407 DISABLE_INTERRUPTS(CLBR_ANY) 1408 TRACE_IRQS_OFF 1409 jmp paranoid_userspace 1410 CFI_ENDPROC 1411END(paranoid_exit) 1412 1413/* 1414 * Exception entry point. This expects an error code/orig_rax on the stack. 1415 * returns in "no swapgs flag" in %ebx. 1416 */ 1417ENTRY(error_entry) 1418 XCPT_FRAME 1419 CFI_ADJUST_CFA_OFFSET 15*8 1420 /* oldrax contains error code */ 1421 cld 1422 movq_cfi rdi, RDI+8 1423 movq_cfi rsi, RSI+8 1424 movq_cfi rdx, RDX+8 1425 movq_cfi rcx, RCX+8 1426 movq_cfi rax, RAX+8 1427 movq_cfi r8, R8+8 1428 movq_cfi r9, R9+8 1429 movq_cfi r10, R10+8 1430 movq_cfi r11, R11+8 1431 movq_cfi rbx, RBX+8 1432 movq_cfi rbp, RBP+8 1433 movq_cfi r12, R12+8 1434 movq_cfi r13, R13+8 1435 movq_cfi r14, R14+8 1436 movq_cfi r15, R15+8 1437 xorl %ebx,%ebx 1438 testl $3,CS+8(%rsp) 1439 je error_kernelspace 1440error_swapgs: 1441 SWAPGS 1442error_sti: 1443 TRACE_IRQS_OFF 1444 ret 1445 CFI_ENDPROC 1446 1447/* 1448 * There are two places in the kernel that can potentially fault with 1449 * usergs. Handle them here. The exception handlers after iret run with 1450 * kernel gs again, so don't set the user space flag. B stepping K8s 1451 * sometimes report an truncated RIP for IRET exceptions returning to 1452 * compat mode. Check for these here too. 1453 */ 1454error_kernelspace: 1455 incl %ebx 1456 leaq irq_return(%rip),%rcx 1457 cmpq %rcx,RIP+8(%rsp) 1458 je error_swapgs 1459 movl %ecx,%eax /* zero extend */ 1460 cmpq %rax,RIP+8(%rsp) 1461 je bstep_iret 1462 cmpq $gs_change,RIP+8(%rsp) 1463 je error_swapgs 1464 jmp error_sti 1465 1466bstep_iret: 1467 /* Fix truncated RIP */ 1468 movq %rcx,RIP+8(%rsp) 1469 jmp error_swapgs 1470END(error_entry) 1471 1472 1473/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1474ENTRY(error_exit) 1475 DEFAULT_FRAME 1476 movl %ebx,%eax 1477 RESTORE_REST 1478 DISABLE_INTERRUPTS(CLBR_NONE) 1479 TRACE_IRQS_OFF 1480 GET_THREAD_INFO(%rcx) 1481 testl %eax,%eax 1482 jne retint_kernel 1483 LOCKDEP_SYS_EXIT_IRQ 1484 movl TI_flags(%rcx),%edx 1485 movl $_TIF_WORK_MASK,%edi 1486 andl %edi,%edx 1487 jnz retint_careful 1488 jmp retint_swapgs 1489 CFI_ENDPROC 1490END(error_exit) 1491 1492 1493 /* runs on exception stack */ 1494ENTRY(nmi) 1495 INTR_FRAME 1496 PARAVIRT_ADJUST_EXCEPTION_FRAME 1497 pushq_cfi $-1 1498 subq $15*8, %rsp 1499 CFI_ADJUST_CFA_OFFSET 15*8 1500 call save_paranoid 1501 DEFAULT_FRAME 0 1502 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1503 movq %rsp,%rdi 1504 movq $-1,%rsi 1505 call do_nmi 1506#ifdef CONFIG_TRACE_IRQFLAGS 1507 /* paranoidexit; without TRACE_IRQS_OFF */ 1508 /* ebx: no swapgs flag */ 1509 DISABLE_INTERRUPTS(CLBR_NONE) 1510 testl %ebx,%ebx /* swapgs needed? */ 1511 jnz nmi_restore 1512 testl $3,CS(%rsp) 1513 jnz nmi_userspace 1514nmi_swapgs: 1515 SWAPGS_UNSAFE_STACK 1516nmi_restore: 1517 RESTORE_ALL 8 1518 jmp irq_return 1519nmi_userspace: 1520 GET_THREAD_INFO(%rcx) 1521 movl TI_flags(%rcx),%ebx 1522 andl $_TIF_WORK_MASK,%ebx 1523 jz nmi_swapgs 1524 movq %rsp,%rdi /* &pt_regs */ 1525 call sync_regs 1526 movq %rax,%rsp /* switch stack for scheduling */ 1527 testl $_TIF_NEED_RESCHED,%ebx 1528 jnz nmi_schedule 1529 movl %ebx,%edx /* arg3: thread flags */ 1530 ENABLE_INTERRUPTS(CLBR_NONE) 1531 xorl %esi,%esi /* arg2: oldset */ 1532 movq %rsp,%rdi /* arg1: &pt_regs */ 1533 call do_notify_resume 1534 DISABLE_INTERRUPTS(CLBR_NONE) 1535 jmp nmi_userspace 1536nmi_schedule: 1537 ENABLE_INTERRUPTS(CLBR_ANY) 1538 call schedule 1539 DISABLE_INTERRUPTS(CLBR_ANY) 1540 jmp nmi_userspace 1541 CFI_ENDPROC 1542#else 1543 jmp paranoid_exit 1544 CFI_ENDPROC 1545#endif 1546END(nmi) 1547 1548ENTRY(ignore_sysret) 1549 CFI_STARTPROC 1550 mov $-ENOSYS,%eax 1551 sysret 1552 CFI_ENDPROC 1553END(ignore_sysret) 1554 1555/* 1556 * End of kprobes section 1557 */ 1558 .popsection 1559

