1/* 2 * 3 * Copyright (C) 1991, 1992 Linus Torvalds 4 */ 5 6/* 7 * entry.S contains the system-call and fault low-level handling routines. 8 * This also contains the timer-interrupt handler, as well as all interrupts 9 * and faults that can result in a task-switch. 10 * 11 * NOTE: This code handles signal-recognition, which happens every time 12 * after a timer-interrupt and after each system call. 13 * 14 * I changed all the .align's to 4 (16 byte alignment), as that's faster 15 * on a 486. 16 * 17 * Stack layout in 'syscall_exit': 18 * ptrace needs to have all regs on the stack. 19 * if the order here is changed, it needs to be 20 * updated in fork.c:copy_process, signal.c:do_signal, 21 * ptrace.c and ptrace.h 22 * 23 * 0(%esp) - %ebx 24 * 4(%esp) - %ecx 25 * 8(%esp) - %edx 26 * C(%esp) - %esi 27 * 10(%esp) - %edi 28 * 14(%esp) - %ebp 29 * 18(%esp) - %eax 30 * 1C(%esp) - %ds 31 * 20(%esp) - %es 32 * 24(%esp) - %fs 33 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 34 * 2C(%esp) - orig_eax 35 * 30(%esp) - %eip 36 * 34(%esp) - %cs 37 * 38(%esp) - %eflags 38 * 3C(%esp) - %oldesp 39 * 40(%esp) - %oldss 40 * 41 * "current" is in register %ebx during any slow entries. 42 */ 43 44#include <linux/linkage.h> 45#include <asm/thread_info.h> 46#include <asm/irqflags.h> 47#include <asm/errno.h> 48#include <asm/segment.h> 49#include <asm/smp.h> 50#include <asm/page_types.h> 51#include <asm/percpu.h> 52#include <asm/dwarf2.h> 53#include <asm/processor-flags.h> 54#include <asm/ftrace.h> 55#include <asm/irq_vectors.h> 56 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 58#include <linux/elf-em.h> 59#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) 60#define __AUDIT_ARCH_LE 0x40000000 61 62#ifndef CONFIG_AUDITSYSCALL 63#define sysenter_audit syscall_trace_entry 64#define sysexit_audit syscall_exit_work 65#endif 66 67/* 68 * We use macros for low-level operations which need to be overridden 69 * for paravirtualization. The following will never clobber any registers: 70 * INTERRUPT_RETURN (aka. "iret") 71 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 72 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 73 * 74 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 75 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 76 * Allowing a register to be clobbered can shrink the paravirt replacement 77 * enough to patch inline, increasing performance. 78 */ 79 80#define nr_syscalls ((syscall_table_size)/4) 81 82#ifdef CONFIG_PREEMPT 83#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 84#else 85#define preempt_stop(clobbers) 86#define resume_kernel restore_all 87#endif 88 89.macro TRACE_IRQS_IRET 90#ifdef CONFIG_TRACE_IRQFLAGS 91 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? 92 jz 1f 93 TRACE_IRQS_ON 941: 95#endif 96.endm 97 98#ifdef CONFIG_VM86 99#define resume_userspace_sig check_userspace 100#else 101#define resume_userspace_sig resume_userspace 102#endif 103 104/* 105 * User gs save/restore 106 * 107 * %gs is used for userland TLS and kernel only uses it for stack 108 * canary which is required to be at %gs:20 by gcc. Read the comment 109 * at the top of stackprotector.h for more info. 110 * 111 * Local labels 98 and 99 are used. 112 */ 113#ifdef CONFIG_X86_32_LAZY_GS 114 115 /* unfortunately push/pop can't be no-op */ 116.macro PUSH_GS 117 pushl $0 118 CFI_ADJUST_CFA_OFFSET 4 119.endm 120.macro POP_GS pop=0 121 addl $(4 + \pop), %esp 122 CFI_ADJUST_CFA_OFFSET -(4 + \pop) 123.endm 124.macro POP_GS_EX 125.endm 126 127 /* all the rest are no-op */ 128.macro PTGS_TO_GS 129.endm 130.macro PTGS_TO_GS_EX 131.endm 132.macro GS_TO_REG reg 133.endm 134.macro REG_TO_PTGS reg 135.endm 136.macro SET_KERNEL_GS reg 137.endm 138 139#else /* CONFIG_X86_32_LAZY_GS */ 140 141.macro PUSH_GS 142 pushl %gs 143 CFI_ADJUST_CFA_OFFSET 4 144 /*CFI_REL_OFFSET gs, 0*/ 145.endm 146 147.macro POP_GS pop=0 14898: popl %gs 149 CFI_ADJUST_CFA_OFFSET -4 150 /*CFI_RESTORE gs*/ 151 .if \pop <> 0 152 add $\pop, %esp 153 CFI_ADJUST_CFA_OFFSET -\pop 154 .endif 155.endm 156.macro POP_GS_EX 157.pushsection .fixup, "ax" 15899: movl $0, (%esp) 159 jmp 98b 160.section __ex_table, "a" 161 .align 4 162 .long 98b, 99b 163.popsection 164.endm 165 166.macro PTGS_TO_GS 16798: mov PT_GS(%esp), %gs 168.endm 169.macro PTGS_TO_GS_EX 170.pushsection .fixup, "ax" 17199: movl $0, PT_GS(%esp) 172 jmp 98b 173.section __ex_table, "a" 174 .align 4 175 .long 98b, 99b 176.popsection 177.endm 178 179.macro GS_TO_REG reg 180 movl %gs, \reg 181 /*CFI_REGISTER gs, \reg*/ 182.endm 183.macro REG_TO_PTGS reg 184 movl \reg, PT_GS(%esp) 185 /*CFI_REL_OFFSET gs, PT_GS*/ 186.endm 187.macro SET_KERNEL_GS reg 188 movl $(__KERNEL_STACK_CANARY), \reg 189 movl \reg, %gs 190.endm 191 192#endif /* CONFIG_X86_32_LAZY_GS */ 193 194.macro SAVE_ALL 195 cld 196 PUSH_GS 197 pushl %fs 198 CFI_ADJUST_CFA_OFFSET 4 199 /*CFI_REL_OFFSET fs, 0;*/ 200 pushl %es 201 CFI_ADJUST_CFA_OFFSET 4 202 /*CFI_REL_OFFSET es, 0;*/ 203 pushl %ds 204 CFI_ADJUST_CFA_OFFSET 4 205 /*CFI_REL_OFFSET ds, 0;*/ 206 pushl %eax 207 CFI_ADJUST_CFA_OFFSET 4 208 CFI_REL_OFFSET eax, 0 209 pushl %ebp 210 CFI_ADJUST_CFA_OFFSET 4 211 CFI_REL_OFFSET ebp, 0 212 pushl %edi 213 CFI_ADJUST_CFA_OFFSET 4 214 CFI_REL_OFFSET edi, 0 215 pushl %esi 216 CFI_ADJUST_CFA_OFFSET 4 217 CFI_REL_OFFSET esi, 0 218 pushl %edx 219 CFI_ADJUST_CFA_OFFSET 4 220 CFI_REL_OFFSET edx, 0 221 pushl %ecx 222 CFI_ADJUST_CFA_OFFSET 4 223 CFI_REL_OFFSET ecx, 0 224 pushl %ebx 225 CFI_ADJUST_CFA_OFFSET 4 226 CFI_REL_OFFSET ebx, 0 227 movl $(__USER_DS), %edx 228 movl %edx, %ds 229 movl %edx, %es 230 movl $(__KERNEL_PERCPU), %edx 231 movl %edx, %fs 232 SET_KERNEL_GS %edx 233.endm 234 235.macro RESTORE_INT_REGS 236 popl %ebx 237 CFI_ADJUST_CFA_OFFSET -4 238 CFI_RESTORE ebx 239 popl %ecx 240 CFI_ADJUST_CFA_OFFSET -4 241 CFI_RESTORE ecx 242 popl %edx 243 CFI_ADJUST_CFA_OFFSET -4 244 CFI_RESTORE edx 245 popl %esi 246 CFI_ADJUST_CFA_OFFSET -4 247 CFI_RESTORE esi 248 popl %edi 249 CFI_ADJUST_CFA_OFFSET -4 250 CFI_RESTORE edi 251 popl %ebp 252 CFI_ADJUST_CFA_OFFSET -4 253 CFI_RESTORE ebp 254 popl %eax 255 CFI_ADJUST_CFA_OFFSET -4 256 CFI_RESTORE eax 257.endm 258 259.macro RESTORE_REGS pop=0 260 RESTORE_INT_REGS 2611: popl %ds 262 CFI_ADJUST_CFA_OFFSET -4 263 /*CFI_RESTORE ds;*/ 2642: popl %es 265 CFI_ADJUST_CFA_OFFSET -4 266 /*CFI_RESTORE es;*/ 2673: popl %fs 268 CFI_ADJUST_CFA_OFFSET -4 269 /*CFI_RESTORE fs;*/ 270 POP_GS \pop 271.pushsection .fixup, "ax" 2724: movl $0, (%esp) 273 jmp 1b 2745: movl $0, (%esp) 275 jmp 2b 2766: movl $0, (%esp) 277 jmp 3b 278.section __ex_table, "a" 279 .align 4 280 .long 1b, 4b 281 .long 2b, 5b 282 .long 3b, 6b 283.popsection 284 POP_GS_EX 285.endm 286 287.macro RING0_INT_FRAME 288 CFI_STARTPROC simple 289 CFI_SIGNAL_FRAME 290 CFI_DEF_CFA esp, 3*4 291 /*CFI_OFFSET cs, -2*4;*/ 292 CFI_OFFSET eip, -3*4 293.endm 294 295.macro RING0_EC_FRAME 296 CFI_STARTPROC simple 297 CFI_SIGNAL_FRAME 298 CFI_DEF_CFA esp, 4*4 299 /*CFI_OFFSET cs, -2*4;*/ 300 CFI_OFFSET eip, -3*4 301.endm 302 303.macro RING0_PTREGS_FRAME 304 CFI_STARTPROC simple 305 CFI_SIGNAL_FRAME 306 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX 307 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ 308 CFI_OFFSET eip, PT_EIP-PT_OLDESP 309 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ 310 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ 311 CFI_OFFSET eax, PT_EAX-PT_OLDESP 312 CFI_OFFSET ebp, PT_EBP-PT_OLDESP 313 CFI_OFFSET edi, PT_EDI-PT_OLDESP 314 CFI_OFFSET esi, PT_ESI-PT_OLDESP 315 CFI_OFFSET edx, PT_EDX-PT_OLDESP 316 CFI_OFFSET ecx, PT_ECX-PT_OLDESP 317 CFI_OFFSET ebx, PT_EBX-PT_OLDESP 318.endm 319 320ENTRY(ret_from_fork) 321 CFI_STARTPROC 322 pushl %eax 323 CFI_ADJUST_CFA_OFFSET 4 324 call schedule_tail 325 GET_THREAD_INFO(%ebp) 326 popl %eax 327 CFI_ADJUST_CFA_OFFSET -4 328 pushl $0x0202 # Reset kernel eflags 329 CFI_ADJUST_CFA_OFFSET 4 330 popfl 331 CFI_ADJUST_CFA_OFFSET -4 332 jmp syscall_exit 333 CFI_ENDPROC 334END(ret_from_fork) 335 336/* 337 * Interrupt exit functions should be protected against kprobes 338 */ 339 .pushsection .kprobes.text, "ax" 340/* 341 * Return to user mode is not as complex as all this looks, 342 * but we want the default path for a system call return to 343 * go as quickly as possible which is why some of this is 344 * less clear than it otherwise should be. 345 */ 346 347 # userspace resumption stub bypassing syscall exit tracing 348 ALIGN 349 RING0_PTREGS_FRAME 350ret_from_exception: 351 preempt_stop(CLBR_ANY) 352ret_from_intr: 353 GET_THREAD_INFO(%ebp) 354check_userspace: 355 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 356 movb PT_CS(%esp), %al 357 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 358 cmpl $USER_RPL, %eax 359 jb resume_kernel # not returning to v8086 or userspace 360 361ENTRY(resume_userspace) 362 LOCKDEP_SYS_EXIT 363 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 364 # setting need_resched or sigpending 365 # between sampling and the iret 366 TRACE_IRQS_OFF 367 movl TI_flags(%ebp), %ecx 368 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on 369 # int/exception return? 370 jne work_pending 371 jmp restore_all 372END(ret_from_exception) 373 374#ifdef CONFIG_PREEMPT 375ENTRY(resume_kernel) 376 DISABLE_INTERRUPTS(CLBR_ANY) 377 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 378 jnz restore_all 379need_resched: 380 movl TI_flags(%ebp), %ecx # need_resched set ? 381 testb $_TIF_NEED_RESCHED, %cl 382 jz restore_all 383 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? 384 jz restore_all 385 call preempt_schedule_irq 386 jmp need_resched 387END(resume_kernel) 388#endif 389 CFI_ENDPROC 390/* 391 * End of kprobes section 392 */ 393 .popsection 394 395/* SYSENTER_RETURN points to after the "sysenter" instruction in 396 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 397 398 # sysenter call handler stub 399ENTRY(ia32_sysenter_target) 400 CFI_STARTPROC simple 401 CFI_SIGNAL_FRAME 402 CFI_DEF_CFA esp, 0 403 CFI_REGISTER esp, ebp 404 movl TSS_sysenter_sp0(%esp),%esp 405sysenter_past_esp: 406 /* 407 * Interrupts are disabled here, but we can't trace it until 408 * enough kernel state to call TRACE_IRQS_OFF can be called - but 409 * we immediately enable interrupts at that point anyway. 410 */ 411 pushl $(__USER_DS) 412 CFI_ADJUST_CFA_OFFSET 4 413 /*CFI_REL_OFFSET ss, 0*/ 414 pushl %ebp 415 CFI_ADJUST_CFA_OFFSET 4 416 CFI_REL_OFFSET esp, 0 417 pushfl 418 orl $X86_EFLAGS_IF, (%esp) 419 CFI_ADJUST_CFA_OFFSET 4 420 pushl $(__USER_CS) 421 CFI_ADJUST_CFA_OFFSET 4 422 /*CFI_REL_OFFSET cs, 0*/ 423 /* 424 * Push current_thread_info()->sysenter_return to the stack. 425 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 426 * pushed above; +8 corresponds to copy_thread's esp0 setting. 427 */ 428 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 429 CFI_ADJUST_CFA_OFFSET 4 430 CFI_REL_OFFSET eip, 0 431 432 pushl %eax 433 CFI_ADJUST_CFA_OFFSET 4 434 SAVE_ALL 435 ENABLE_INTERRUPTS(CLBR_NONE) 436 437/* 438 * Load the potential sixth argument from user stack. 439 * Careful about security. 440 */ 441 cmpl $__PAGE_OFFSET-3,%ebp 442 jae syscall_fault 4431: movl (%ebp),%ebp 444 movl %ebp,PT_EBP(%esp) 445.section __ex_table,"a" 446 .align 4 447 .long 1b,syscall_fault 448.previous 449 450 GET_THREAD_INFO(%ebp) 451 452 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 453 jnz sysenter_audit 454sysenter_do_call: 455 cmpl $(nr_syscalls), %eax 456 jae syscall_badsys 457 call *sys_call_table(,%eax,4) 458 movl %eax,PT_EAX(%esp) 459 LOCKDEP_SYS_EXIT 460 DISABLE_INTERRUPTS(CLBR_ANY) 461 TRACE_IRQS_OFF 462 movl TI_flags(%ebp), %ecx 463 testl $_TIF_ALLWORK_MASK, %ecx 464 jne sysexit_audit 465sysenter_exit: 466/* if something modifies registers it must also disable sysexit */ 467 movl PT_EIP(%esp), %edx 468 movl PT_OLDESP(%esp), %ecx 469 xorl %ebp,%ebp 470 TRACE_IRQS_ON 4711: mov PT_FS(%esp), %fs 472 PTGS_TO_GS 473 ENABLE_INTERRUPTS_SYSEXIT 474 475#ifdef CONFIG_AUDITSYSCALL 476sysenter_audit: 477 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 478 jnz syscall_trace_entry 479 addl $4,%esp 480 CFI_ADJUST_CFA_OFFSET -4 481 /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ 482 /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ 483 /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ 484 movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ 485 movl %eax,%edx /* 2nd arg: syscall number */ 486 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ 487 call audit_syscall_entry 488 pushl %ebx 489 CFI_ADJUST_CFA_OFFSET 4 490 movl PT_EAX(%esp),%eax /* reload syscall number */ 491 jmp sysenter_do_call 492 493sysexit_audit: 494 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx 495 jne syscall_exit_work 496 TRACE_IRQS_ON 497 ENABLE_INTERRUPTS(CLBR_ANY) 498 movl %eax,%edx /* second arg, syscall return value */ 499 cmpl $0,%eax /* is it < 0? */ 500 setl %al /* 1 if so, 0 if not */ 501 movzbl %al,%eax /* zero-extend that */ 502 inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 503 call audit_syscall_exit 504 DISABLE_INTERRUPTS(CLBR_ANY) 505 TRACE_IRQS_OFF 506 movl TI_flags(%ebp), %ecx 507 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx 508 jne syscall_exit_work 509 movl PT_EAX(%esp),%eax /* reload syscall return value */ 510 jmp sysenter_exit 511#endif 512 513 CFI_ENDPROC 514.pushsection .fixup,"ax" 5152: movl $0,PT_FS(%esp) 516 jmp 1b 517.section __ex_table,"a" 518 .align 4 519 .long 1b,2b 520.popsection 521 PTGS_TO_GS_EX 522ENDPROC(ia32_sysenter_target) 523 524/* 525 * syscall stub including irq exit should be protected against kprobes 526 */ 527 .pushsection .kprobes.text, "ax" 528 # system call handler stub 529ENTRY(system_call) 530 RING0_INT_FRAME # can't unwind into user space anyway 531 pushl %eax # save orig_eax 532 CFI_ADJUST_CFA_OFFSET 4 533 SAVE_ALL 534 GET_THREAD_INFO(%ebp) 535 # system call tracing in operation / emulation 536 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 537 jnz syscall_trace_entry 538 cmpl $(nr_syscalls), %eax 539 jae syscall_badsys 540syscall_call: 541 call *sys_call_table(,%eax,4) 542 movl %eax,PT_EAX(%esp) # store the return value 543syscall_exit: 544 LOCKDEP_SYS_EXIT 545 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 546 # setting need_resched or sigpending 547 # between sampling and the iret 548 TRACE_IRQS_OFF 549 movl TI_flags(%ebp), %ecx 550 testl $_TIF_ALLWORK_MASK, %ecx # current->work 551 jne syscall_exit_work 552 553restore_all: 554 TRACE_IRQS_IRET 555restore_all_notrace: 556 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 557 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we 558 # are returning to the kernel. 559 # See comments in process.c:copy_thread() for details. 560 movb PT_OLDSS(%esp), %ah 561 movb PT_CS(%esp), %al 562 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 563 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 564 CFI_REMEMBER_STATE 565 je ldt_ss # returning to user-space with LDT SS 566restore_nocheck: 567 RESTORE_REGS 4 # skip orig_eax/error_code 568 CFI_ADJUST_CFA_OFFSET -4 569irq_return: 570 INTERRUPT_RETURN 571.section .fixup,"ax" 572ENTRY(iret_exc) 573 pushl $0 # no error code 574 pushl $do_iret_error 575 jmp error_code 576.previous 577.section __ex_table,"a" 578 .align 4 579 .long irq_return,iret_exc 580.previous 581 582 CFI_RESTORE_STATE 583ldt_ss: 584 larl PT_OLDSS(%esp), %eax 585 jnz restore_nocheck 586 testl $0x00400000, %eax # returning to 32bit stack? 587 jnz restore_nocheck # allright, normal return 588 589#ifdef CONFIG_PARAVIRT 590 /* 591 * The kernel can't run on a non-flat stack if paravirt mode 592 * is active. Rather than try to fixup the high bits of 593 * ESP, bypass this code entirely. This may break DOSemu 594 * and/or Wine support in a paravirt VM, although the option 595 * is still available to implement the setting of the high 596 * 16-bits in the INTERRUPT_RETURN paravirt-op. 597 */ 598 cmpl $0, pv_info+PARAVIRT_enabled 599 jne restore_nocheck 600#endif 601 602/* 603 * Setup and switch to ESPFIX stack 604 * 605 * We're returning to userspace with a 16 bit stack. The CPU will not 606 * restore the high word of ESP for us on executing iret... This is an 607 * "official" bug of all the x86-compatible CPUs, which we can work 608 * around to make dosemu and wine happy. We do this by preloading the 609 * high word of ESP with the high word of the userspace ESP while 610 * compensating for the offset by changing to the ESPFIX segment with 611 * a base address that matches for the difference. 612 */ 613 mov %esp, %edx /* load kernel esp */ 614 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 615 mov %dx, %ax /* eax: new kernel esp */ 616 sub %eax, %edx /* offset (low word is 0) */ 617 PER_CPU(gdt_page, %ebx) 618 shr $16, %edx 619 mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ 620 mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ 621 pushl $__ESPFIX_SS 622 CFI_ADJUST_CFA_OFFSET 4 623 push %eax /* new kernel esp */ 624 CFI_ADJUST_CFA_OFFSET 4 625 /* Disable interrupts, but do not irqtrace this section: we 626 * will soon execute iret and the tracer was already set to 627 * the irqstate after the iret */ 628 DISABLE_INTERRUPTS(CLBR_EAX) 629 lss (%esp), %esp /* switch to espfix segment */ 630 CFI_ADJUST_CFA_OFFSET -8 631 jmp restore_nocheck 632 CFI_ENDPROC 633ENDPROC(system_call) 634 635 # perform work that needs to be done immediately before resumption 636 ALIGN 637 RING0_PTREGS_FRAME # can't unwind into user space anyway 638work_pending: 639 testb $_TIF_NEED_RESCHED, %cl 640 jz work_notifysig 641work_resched: 642 call schedule 643 LOCKDEP_SYS_EXIT 644 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 645 # setting need_resched or sigpending 646 # between sampling and the iret 647 TRACE_IRQS_OFF 648 movl TI_flags(%ebp), %ecx 649 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other 650 # than syscall tracing? 651 jz restore_all 652 testb $_TIF_NEED_RESCHED, %cl 653 jnz work_resched 654 655work_notifysig: # deal with pending signals and 656 # notify-resume requests 657#ifdef CONFIG_VM86 658 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) 659 movl %esp, %eax 660 jne work_notifysig_v86 # returning to kernel-space or 661 # vm86-space 662 xorl %edx, %edx 663 call do_notify_resume 664 jmp resume_userspace_sig 665 666 ALIGN 667work_notifysig_v86: 668 pushl %ecx # save ti_flags for do_notify_resume 669 CFI_ADJUST_CFA_OFFSET 4 670 call save_v86_state # %eax contains pt_regs pointer 671 popl %ecx 672 CFI_ADJUST_CFA_OFFSET -4 673 movl %eax, %esp 674#else 675 movl %esp, %eax 676#endif 677 xorl %edx, %edx 678 call do_notify_resume 679 jmp resume_userspace_sig 680END(work_pending) 681 682 # perform syscall exit tracing 683 ALIGN 684syscall_trace_entry: 685 movl $-ENOSYS,PT_EAX(%esp) 686 movl %esp, %eax 687 call syscall_trace_enter 688 /* What it returned is what we'll actually use. */ 689 cmpl $(nr_syscalls), %eax 690 jnae syscall_call 691 jmp syscall_exit 692END(syscall_trace_entry) 693 694 # perform syscall exit tracing 695 ALIGN 696syscall_exit_work: 697 testl $_TIF_WORK_SYSCALL_EXIT, %ecx 698 jz work_pending 699 TRACE_IRQS_ON 700 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call 701 # schedule() instead 702 movl %esp, %eax 703 call syscall_trace_leave 704 jmp resume_userspace 705END(syscall_exit_work) 706 CFI_ENDPROC 707 708 RING0_INT_FRAME # can't unwind into user space anyway 709syscall_fault: 710 GET_THREAD_INFO(%ebp) 711 movl $-EFAULT,PT_EAX(%esp) 712 jmp resume_userspace 713END(syscall_fault) 714 715syscall_badsys: 716 movl $-ENOSYS,PT_EAX(%esp) 717 jmp resume_userspace 718END(syscall_badsys) 719 CFI_ENDPROC 720/* 721 * End of kprobes section 722 */ 723 .popsection 724 725/* 726 * System calls that need a pt_regs pointer. 727 */ 728#define PTREGSCALL0(name) \ 729 ALIGN; \ 730ptregs_##name: \ 731 leal 4(%esp),%eax; \ 732 jmp sys_##name; 733 734#define PTREGSCALL1(name) \ 735 ALIGN; \ 736ptregs_##name: \ 737 leal 4(%esp),%edx; \ 738 movl (PT_EBX+4)(%esp),%eax; \ 739 jmp sys_##name; 740 741#define PTREGSCALL2(name) \ 742 ALIGN; \ 743ptregs_##name: \ 744 leal 4(%esp),%ecx; \ 745 movl (PT_ECX+4)(%esp),%edx; \ 746 movl (PT_EBX+4)(%esp),%eax; \ 747 jmp sys_##name; 748 749#define PTREGSCALL3(name) \ 750 ALIGN; \ 751ptregs_##name: \ 752 leal 4(%esp),%eax; \ 753 pushl %eax; \ 754 movl PT_EDX(%eax),%ecx; \ 755 movl PT_ECX(%eax),%edx; \ 756 movl PT_EBX(%eax),%eax; \ 757 call sys_##name; \ 758 addl $4,%esp; \ 759 ret 760 761PTREGSCALL1(iopl) 762PTREGSCALL0(fork) 763PTREGSCALL0(vfork) 764PTREGSCALL3(execve) 765PTREGSCALL2(sigaltstack) 766PTREGSCALL0(sigreturn) 767PTREGSCALL0(rt_sigreturn) 768PTREGSCALL2(vm86) 769PTREGSCALL1(vm86old) 770 771/* Clone is an oddball. The 4th arg is in %edi */ 772 ALIGN; 773ptregs_clone: 774 leal 4(%esp),%eax 775 pushl %eax 776 pushl PT_EDI(%eax) 777 movl PT_EDX(%eax),%ecx 778 movl PT_ECX(%eax),%edx 779 movl PT_EBX(%eax),%eax 780 call sys_clone 781 addl $8,%esp 782 ret 783 784.macro FIXUP_ESPFIX_STACK 785/* 786 * Switch back for ESPFIX stack to the normal zerobased stack 787 * 788 * We can't call C functions using the ESPFIX stack. This code reads 789 * the high word of the segment base from the GDT and swiches to the 790 * normal stack and adjusts ESP with the matching offset. 791 */ 792 /* fixup the stack */ 793 PER_CPU(gdt_page, %ebx) 794 mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ 795 mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ 796 shl $16, %eax 797 addl %esp, %eax /* the adjusted stack pointer */ 798 pushl $__KERNEL_DS 799 CFI_ADJUST_CFA_OFFSET 4 800 pushl %eax 801 CFI_ADJUST_CFA_OFFSET 4 802 lss (%esp), %esp /* switch to the normal stack segment */ 803 CFI_ADJUST_CFA_OFFSET -8 804.endm 805.macro UNWIND_ESPFIX_STACK 806 movl %ss, %eax 807 /* see if on espfix stack */ 808 cmpw $__ESPFIX_SS, %ax 809 jne 27f 810 movl $__KERNEL_DS, %eax 811 movl %eax, %ds 812 movl %eax, %es 813 /* switch to normal stack */ 814 FIXUP_ESPFIX_STACK 81527: 816.endm 817 818/* 819 * Build the entry stubs and pointer table with some assembler magic. 820 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 821 * single cache line on all modern x86 implementations. 822 */ 823.section .init.rodata,"a" 824ENTRY(interrupt) 825.text 826 .p2align 5 827 .p2align CONFIG_X86_L1_CACHE_SHIFT 828ENTRY(irq_entries_start) 829 RING0_INT_FRAME 830vector=FIRST_EXTERNAL_VECTOR 831.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 832 .balign 32 833 .rept 7 834 .if vector < NR_VECTORS 835 .if vector <> FIRST_EXTERNAL_VECTOR 836 CFI_ADJUST_CFA_OFFSET -4 837 .endif 8381: pushl $(~vector+0x80) /* Note: always in signed byte range */ 839 CFI_ADJUST_CFA_OFFSET 4 840 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 841 jmp 2f 842 .endif 843 .previous 844 .long 1b 845 .text 846vector=vector+1 847 .endif 848 .endr 8492: jmp common_interrupt 850.endr 851END(irq_entries_start) 852 853.previous 854END(interrupt) 855.previous 856 857/* 858 * the CPU automatically disables interrupts when executing an IRQ vector, 859 * so IRQ-flags tracing has to follow that: 860 */ 861 .p2align CONFIG_X86_L1_CACHE_SHIFT 862common_interrupt: 863 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ 864 SAVE_ALL 865 TRACE_IRQS_OFF 866 movl %esp,%eax 867 call do_IRQ 868 jmp ret_from_intr 869ENDPROC(common_interrupt) 870 CFI_ENDPROC 871 872/* 873 * Irq entries should be protected against kprobes 874 */ 875 .pushsection .kprobes.text, "ax" 876#define BUILD_INTERRUPT3(name, nr, fn) \ 877ENTRY(name) \ 878 RING0_INT_FRAME; \ 879 pushl $~(nr); \ 880 CFI_ADJUST_CFA_OFFSET 4; \ 881 SAVE_ALL; \ 882 TRACE_IRQS_OFF \ 883 movl %esp,%eax; \ 884 call fn; \ 885 jmp ret_from_intr; \ 886 CFI_ENDPROC; \ 887ENDPROC(name) 888 889#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) 890 891/* The include is where all of the SMP etc. interrupts come from */ 892#include <asm/entry_arch.h> 893 894ENTRY(coprocessor_error) 895 RING0_INT_FRAME 896 pushl $0 897 CFI_ADJUST_CFA_OFFSET 4 898 pushl $do_coprocessor_error 899 CFI_ADJUST_CFA_OFFSET 4 900 jmp error_code 901 CFI_ENDPROC 902END(coprocessor_error) 903 904ENTRY(simd_coprocessor_error) 905 RING0_INT_FRAME 906 pushl $0 907 CFI_ADJUST_CFA_OFFSET 4 908 pushl $do_simd_coprocessor_error 909 CFI_ADJUST_CFA_OFFSET 4 910 jmp error_code 911 CFI_ENDPROC 912END(simd_coprocessor_error) 913 914ENTRY(device_not_available) 915 RING0_INT_FRAME 916 pushl $-1 # mark this as an int 917 CFI_ADJUST_CFA_OFFSET 4 918 pushl $do_device_not_available 919 CFI_ADJUST_CFA_OFFSET 4 920 jmp error_code 921 CFI_ENDPROC 922END(device_not_available) 923 924#ifdef CONFIG_PARAVIRT 925ENTRY(native_iret) 926 iret 927.section __ex_table,"a" 928 .align 4 929 .long native_iret, iret_exc 930.previous 931END(native_iret) 932 933ENTRY(native_irq_enable_sysexit) 934 sti 935 sysexit 936END(native_irq_enable_sysexit) 937#endif 938 939ENTRY(overflow) 940 RING0_INT_FRAME 941 pushl $0 942 CFI_ADJUST_CFA_OFFSET 4 943 pushl $do_overflow 944 CFI_ADJUST_CFA_OFFSET 4 945 jmp error_code 946 CFI_ENDPROC 947END(overflow) 948 949ENTRY(bounds) 950 RING0_INT_FRAME 951 pushl $0 952 CFI_ADJUST_CFA_OFFSET 4 953 pushl $do_bounds 954 CFI_ADJUST_CFA_OFFSET 4 955 jmp error_code 956 CFI_ENDPROC 957END(bounds) 958 959ENTRY(invalid_op) 960 RING0_INT_FRAME 961 pushl $0 962 CFI_ADJUST_CFA_OFFSET 4 963 pushl $do_invalid_op 964 CFI_ADJUST_CFA_OFFSET 4 965 jmp error_code 966 CFI_ENDPROC 967END(invalid_op) 968 969ENTRY(coprocessor_segment_overrun) 970 RING0_INT_FRAME 971 pushl $0 972 CFI_ADJUST_CFA_OFFSET 4 973 pushl $do_coprocessor_segment_overrun 974 CFI_ADJUST_CFA_OFFSET 4 975 jmp error_code 976 CFI_ENDPROC 977END(coprocessor_segment_overrun) 978 979ENTRY(invalid_TSS) 980 RING0_EC_FRAME 981 pushl $do_invalid_TSS 982 CFI_ADJUST_CFA_OFFSET 4 983 jmp error_code 984 CFI_ENDPROC 985END(invalid_TSS) 986 987ENTRY(segment_not_present) 988 RING0_EC_FRAME 989 pushl $do_segment_not_present 990 CFI_ADJUST_CFA_OFFSET 4 991 jmp error_code 992 CFI_ENDPROC 993END(segment_not_present) 994 995ENTRY(stack_segment) 996 RING0_EC_FRAME 997 pushl $do_stack_segment 998 CFI_ADJUST_CFA_OFFSET 4 999 jmp error_code 1000 CFI_ENDPROC
1001END(stack_segment) 1002 1003ENTRY(alignment_check) 1004 RING0_EC_FRAME 1005 pushl $do_alignment_check 1006 CFI_ADJUST_CFA_OFFSET 4 1007 jmp error_code 1008 CFI_ENDPROC 1009END(alignment_check) 1010 1011ENTRY(divide_error) 1012 RING0_INT_FRAME 1013 pushl $0 # no error code 1014 CFI_ADJUST_CFA_OFFSET 4 1015 pushl $do_divide_error 1016 CFI_ADJUST_CFA_OFFSET 4 1017 jmp error_code 1018 CFI_ENDPROC 1019END(divide_error) 1020 1021#ifdef CONFIG_X86_MCE 1022ENTRY(machine_check) 1023 RING0_INT_FRAME 1024 pushl $0 1025 CFI_ADJUST_CFA_OFFSET 4 1026 pushl machine_check_vector 1027 CFI_ADJUST_CFA_OFFSET 4 1028 jmp error_code 1029 CFI_ENDPROC 1030END(machine_check) 1031#endif 1032 1033ENTRY(spurious_interrupt_bug) 1034 RING0_INT_FRAME 1035 pushl $0 1036 CFI_ADJUST_CFA_OFFSET 4 1037 pushl $do_spurious_interrupt_bug 1038 CFI_ADJUST_CFA_OFFSET 4 1039 jmp error_code 1040 CFI_ENDPROC 1041END(spurious_interrupt_bug) 1042/* 1043 * End of kprobes section 1044 */ 1045 .popsection 1046 1047ENTRY(kernel_thread_helper) 1048 pushl $0 # fake return address for unwinder 1049 CFI_STARTPROC 1050 movl %edi,%eax 1051 call *%esi 1052 call do_exit 1053 ud2 # padding for call trace 1054 CFI_ENDPROC 1055ENDPROC(kernel_thread_helper) 1056 1057#ifdef CONFIG_XEN 1058/* Xen doesn't set %esp to be precisely what the normal sysenter 1059 entrypoint expects, so fix it up before using the normal path. */ 1060ENTRY(xen_sysenter_target) 1061 RING0_INT_FRAME 1062 addl $5*4, %esp /* remove xen-provided frame */ 1063 CFI_ADJUST_CFA_OFFSET -5*4 1064 jmp sysenter_past_esp 1065 CFI_ENDPROC 1066 1067ENTRY(xen_hypervisor_callback) 1068 CFI_STARTPROC 1069 pushl $0 1070 CFI_ADJUST_CFA_OFFSET 4 1071 SAVE_ALL 1072 TRACE_IRQS_OFF 1073 1074 /* Check to see if we got the event in the critical 1075 region in xen_iret_direct, after we've reenabled 1076 events and checked for pending events. This simulates 1077 iret instruction's behaviour where it delivers a 1078 pending interrupt when enabling interrupts. */ 1079 movl PT_EIP(%esp),%eax 1080 cmpl $xen_iret_start_crit,%eax 1081 jb 1f 1082 cmpl $xen_iret_end_crit,%eax 1083 jae 1f 1084 1085 jmp xen_iret_crit_fixup 1086 1087ENTRY(xen_do_upcall) 10881: mov %esp, %eax 1089 call xen_evtchn_do_upcall 1090 jmp ret_from_intr 1091 CFI_ENDPROC 1092ENDPROC(xen_hypervisor_callback) 1093 1094# Hypervisor uses this for application faults while it executes. 1095# We get here for two reasons: 1096# 1. Fault while reloading DS, ES, FS or GS 1097# 2. Fault while executing IRET 1098# Category 1 we fix up by reattempting the load, and zeroing the segment 1099# register if the load fails. 1100# Category 2 we fix up by jumping to do_iret_error. We cannot use the 1101# normal Linux return path in this case because if we use the IRET hypercall 1102# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1103# We distinguish between categories by maintaining a status value in EAX. 1104ENTRY(xen_failsafe_callback) 1105 CFI_STARTPROC 1106 pushl %eax 1107 CFI_ADJUST_CFA_OFFSET 4 1108 movl $1,%eax 11091: mov 4(%esp),%ds 11102: mov 8(%esp),%es 11113: mov 12(%esp),%fs 11124: mov 16(%esp),%gs 1113 testl %eax,%eax 1114 popl %eax 1115 CFI_ADJUST_CFA_OFFSET -4 1116 lea 16(%esp),%esp 1117 CFI_ADJUST_CFA_OFFSET -16 1118 jz 5f 1119 addl $16,%esp 1120 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) 11215: pushl $0 # EAX == 0 => Category 1 (Bad segment) 1122 CFI_ADJUST_CFA_OFFSET 4 1123 SAVE_ALL 1124 jmp ret_from_exception 1125 CFI_ENDPROC 1126 1127.section .fixup,"ax" 11286: xorl %eax,%eax 1129 movl %eax,4(%esp) 1130 jmp 1b 11317: xorl %eax,%eax 1132 movl %eax,8(%esp) 1133 jmp 2b 11348: xorl %eax,%eax 1135 movl %eax,12(%esp) 1136 jmp 3b 11379: xorl %eax,%eax 1138 movl %eax,16(%esp) 1139 jmp 4b 1140.previous 1141.section __ex_table,"a" 1142 .align 4 1143 .long 1b,6b 1144 .long 2b,7b 1145 .long 3b,8b 1146 .long 4b,9b 1147.previous 1148ENDPROC(xen_failsafe_callback) 1149 1150#endif /* CONFIG_XEN */ 1151 1152#ifdef CONFIG_FUNCTION_TRACER 1153#ifdef CONFIG_DYNAMIC_FTRACE 1154 1155ENTRY(mcount) 1156 ret 1157END(mcount) 1158 1159ENTRY(ftrace_caller) 1160 cmpl $0, function_trace_stop 1161 jne ftrace_stub 1162 1163 pushl %eax 1164 pushl %ecx 1165 pushl %edx 1166 movl 0xc(%esp), %eax 1167 movl 0x4(%ebp), %edx 1168 subl $MCOUNT_INSN_SIZE, %eax 1169 1170.globl ftrace_call 1171ftrace_call: 1172 call ftrace_stub 1173 1174 popl %edx 1175 popl %ecx 1176 popl %eax 1177#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1178.globl ftrace_graph_call 1179ftrace_graph_call: 1180 jmp ftrace_stub 1181#endif 1182 1183.globl ftrace_stub 1184ftrace_stub: 1185 ret 1186END(ftrace_caller) 1187 1188#else /* ! CONFIG_DYNAMIC_FTRACE */ 1189 1190ENTRY(mcount) 1191 cmpl $0, function_trace_stop 1192 jne ftrace_stub 1193 1194 cmpl $ftrace_stub, ftrace_trace_function 1195 jnz trace 1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1197 cmpl $ftrace_stub, ftrace_graph_return 1198 jnz ftrace_graph_caller 1199 1200 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry 1201 jnz ftrace_graph_caller 1202#endif 1203.globl ftrace_stub 1204ftrace_stub: 1205 ret 1206 1207 /* taken from glibc */ 1208trace: 1209 pushl %eax 1210 pushl %ecx 1211 pushl %edx 1212 movl 0xc(%esp), %eax 1213 movl 0x4(%ebp), %edx 1214 subl $MCOUNT_INSN_SIZE, %eax 1215 1216 call *ftrace_trace_function 1217 1218 popl %edx 1219 popl %ecx 1220 popl %eax 1221 jmp ftrace_stub 1222END(mcount) 1223#endif /* CONFIG_DYNAMIC_FTRACE */ 1224#endif /* CONFIG_FUNCTION_TRACER */ 1225 1226#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1227ENTRY(ftrace_graph_caller) 1228 cmpl $0, function_trace_stop 1229 jne ftrace_stub 1230 1231 pushl %eax 1232 pushl %ecx 1233 pushl %edx 1234 movl 0xc(%esp), %edx 1235 lea 0x4(%ebp), %eax 1236 movl (%ebp), %ecx 1237 subl $MCOUNT_INSN_SIZE, %edx 1238 call prepare_ftrace_return 1239 popl %edx 1240 popl %ecx 1241 popl %eax 1242 ret 1243END(ftrace_graph_caller) 1244 1245.globl return_to_handler 1246return_to_handler: 1247 pushl %eax 1248 pushl %edx 1249 movl %ebp, %eax 1250 call ftrace_return_to_handler 1251 movl %eax, %ecx 1252 popl %edx 1253 popl %eax 1254 jmp *%ecx 1255#endif 1256 1257.section .rodata,"a" 1258#include "syscall_table_32.S" 1259 1260syscall_table_size=(.-sys_call_table) 1261 1262/* 1263 * Some functions should be protected against kprobes 1264 */ 1265 .pushsection .kprobes.text, "ax" 1266 1267ENTRY(page_fault) 1268 RING0_EC_FRAME 1269 pushl $do_page_fault 1270 CFI_ADJUST_CFA_OFFSET 4 1271 ALIGN 1272error_code: 1273 /* the function address is in %gs's slot on the stack */ 1274 pushl %fs 1275 CFI_ADJUST_CFA_OFFSET 4 1276 /*CFI_REL_OFFSET fs, 0*/ 1277 pushl %es 1278 CFI_ADJUST_CFA_OFFSET 4 1279 /*CFI_REL_OFFSET es, 0*/ 1280 pushl %ds 1281 CFI_ADJUST_CFA_OFFSET 4 1282 /*CFI_REL_OFFSET ds, 0*/ 1283 pushl %eax 1284 CFI_ADJUST_CFA_OFFSET 4 1285 CFI_REL_OFFSET eax, 0 1286 pushl %ebp 1287 CFI_ADJUST_CFA_OFFSET 4 1288 CFI_REL_OFFSET ebp, 0 1289 pushl %edi 1290 CFI_ADJUST_CFA_OFFSET 4 1291 CFI_REL_OFFSET edi, 0 1292 pushl %esi 1293 CFI_ADJUST_CFA_OFFSET 4 1294 CFI_REL_OFFSET esi, 0 1295 pushl %edx 1296 CFI_ADJUST_CFA_OFFSET 4 1297 CFI_REL_OFFSET edx, 0 1298 pushl %ecx 1299 CFI_ADJUST_CFA_OFFSET 4 1300 CFI_REL_OFFSET ecx, 0 1301 pushl %ebx 1302 CFI_ADJUST_CFA_OFFSET 4 1303 CFI_REL_OFFSET ebx, 0 1304 cld 1305 movl $(__KERNEL_PERCPU), %ecx 1306 movl %ecx, %fs 1307 UNWIND_ESPFIX_STACK 1308 GS_TO_REG %ecx 1309 movl PT_GS(%esp), %edi # get the function address 1310 movl PT_ORIG_EAX(%esp), %edx # get the error code 1311 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1312 REG_TO_PTGS %ecx 1313 SET_KERNEL_GS %ecx 1314 movl $(__USER_DS), %ecx 1315 movl %ecx, %ds 1316 movl %ecx, %es 1317 TRACE_IRQS_OFF 1318 movl %esp,%eax # pt_regs pointer 1319 call *%edi 1320 jmp ret_from_exception 1321 CFI_ENDPROC 1322END(page_fault) 1323 1324/* 1325 * Debug traps and NMI can happen at the one SYSENTER instruction 1326 * that sets up the real kernel stack. Check here, since we can't 1327 * allow the wrong stack to be used. 1328 * 1329 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have 1330 * already pushed 3 words if it hits on the sysenter instruction: 1331 * eflags, cs and eip. 1332 * 1333 * We just load the right stack, and push the three (known) values 1334 * by hand onto the new stack - while updating the return eip past 1335 * the instruction that would have done it for sysenter. 1336 */ 1337.macro FIX_STACK offset ok label 1338 cmpw $__KERNEL_CS, 4(%esp) 1339 jne \ok 1340\label: 1341 movl TSS_sysenter_sp0 + \offset(%esp), %esp 1342 CFI_DEF_CFA esp, 0 1343 CFI_UNDEFINED eip 1344 pushfl 1345 CFI_ADJUST_CFA_OFFSET 4 1346 pushl $__KERNEL_CS 1347 CFI_ADJUST_CFA_OFFSET 4 1348 pushl $sysenter_past_esp 1349 CFI_ADJUST_CFA_OFFSET 4 1350 CFI_REL_OFFSET eip, 0 1351.endm 1352 1353ENTRY(debug) 1354 RING0_INT_FRAME 1355 cmpl $ia32_sysenter_target,(%esp) 1356 jne debug_stack_correct 1357 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1358debug_stack_correct: 1359 pushl $-1 # mark this as an int 1360 CFI_ADJUST_CFA_OFFSET 4 1361 SAVE_ALL 1362 TRACE_IRQS_OFF 1363 xorl %edx,%edx # error code 0 1364 movl %esp,%eax # pt_regs pointer 1365 call do_debug 1366 jmp ret_from_exception 1367 CFI_ENDPROC 1368END(debug) 1369 1370/* 1371 * NMI is doubly nasty. It can happen _while_ we're handling 1372 * a debug fault, and the debug fault hasn't yet been able to 1373 * clear up the stack. So we first check whether we got an 1374 * NMI on the sysenter entry path, but after that we need to 1375 * check whether we got an NMI on the debug path where the debug 1376 * fault happened on the sysenter path. 1377 */ 1378ENTRY(nmi) 1379 RING0_INT_FRAME 1380 pushl %eax 1381 CFI_ADJUST_CFA_OFFSET 4 1382 movl %ss, %eax 1383 cmpw $__ESPFIX_SS, %ax 1384 popl %eax 1385 CFI_ADJUST_CFA_OFFSET -4 1386 je nmi_espfix_stack 1387 cmpl $ia32_sysenter_target,(%esp) 1388 je nmi_stack_fixup 1389 pushl %eax 1390 CFI_ADJUST_CFA_OFFSET 4 1391 movl %esp,%eax 1392 /* Do not access memory above the end of our stack page, 1393 * it might not exist. 1394 */ 1395 andl $(THREAD_SIZE-1),%eax 1396 cmpl $(THREAD_SIZE-20),%eax 1397 popl %eax 1398 CFI_ADJUST_CFA_OFFSET -4 1399 jae nmi_stack_correct 1400 cmpl $ia32_sysenter_target,12(%esp) 1401 je nmi_debug_stack_check 1402nmi_stack_correct: 1403 /* We have a RING0_INT_FRAME here */ 1404 pushl %eax 1405 CFI_ADJUST_CFA_OFFSET 4 1406 SAVE_ALL 1407 xorl %edx,%edx # zero error code 1408 movl %esp,%eax # pt_regs pointer 1409 call do_nmi 1410 jmp restore_all_notrace 1411 CFI_ENDPROC 1412 1413nmi_stack_fixup: 1414 RING0_INT_FRAME 1415 FIX_STACK 12, nmi_stack_correct, 1 1416 jmp nmi_stack_correct 1417 1418nmi_debug_stack_check: 1419 /* We have a RING0_INT_FRAME here */ 1420 cmpw $__KERNEL_CS,16(%esp) 1421 jne nmi_stack_correct 1422 cmpl $debug,(%esp) 1423 jb nmi_stack_correct 1424 cmpl $debug_esp_fix_insn,(%esp) 1425 ja nmi_stack_correct 1426 FIX_STACK 24, nmi_stack_correct, 1 1427 jmp nmi_stack_correct 1428 1429nmi_espfix_stack: 1430 /* We have a RING0_INT_FRAME here. 1431 * 1432 * create the pointer to lss back 1433 */ 1434 pushl %ss 1435 CFI_ADJUST_CFA_OFFSET 4 1436 pushl %esp 1437 CFI_ADJUST_CFA_OFFSET 4 1438 addl $4, (%esp) 1439 /* copy the iret frame of 12 bytes */ 1440 .rept 3 1441 pushl 16(%esp) 1442 CFI_ADJUST_CFA_OFFSET 4 1443 .endr 1444 pushl %eax 1445 CFI_ADJUST_CFA_OFFSET 4 1446 SAVE_ALL 1447 FIXUP_ESPFIX_STACK # %eax == %esp 1448 xorl %edx,%edx # zero error code 1449 call do_nmi 1450 RESTORE_REGS 1451 lss 12+4(%esp), %esp # back to espfix stack 1452 CFI_ADJUST_CFA_OFFSET -24 1453 jmp irq_return 1454 CFI_ENDPROC 1455END(nmi) 1456 1457ENTRY(int3) 1458 RING0_INT_FRAME 1459 pushl $-1 # mark this as an int 1460 CFI_ADJUST_CFA_OFFSET 4 1461 SAVE_ALL 1462 TRACE_IRQS_OFF 1463 xorl %edx,%edx # zero error code 1464 movl %esp,%eax # pt_regs pointer 1465 call do_int3 1466 jmp ret_from_exception 1467 CFI_ENDPROC 1468END(int3) 1469 1470ENTRY(general_protection) 1471 RING0_EC_FRAME 1472 pushl $do_general_protection 1473 CFI_ADJUST_CFA_OFFSET 4 1474 jmp error_code 1475 CFI_ENDPROC 1476END(general_protection) 1477 1478/* 1479 * End of kprobes section 1480 */ 1481 .popsection 1482

