1/* 2 * 3 * Copyright (C) 1991, 1992 Linus Torvalds 4 */ 5 6/* 7 * entry.S contains the system-call and fault low-level handling routines. 8 * This also contains the timer-interrupt handler, as well as all interrupts 9 * and faults that can result in a task-switch. 10 * 11 * NOTE: This code handles signal-recognition, which happens every time 12 * after a timer-interrupt and after each system call. 13 * 14 * I changed all the .align's to 4 (16 byte alignment), as that's faster 15 * on a 486. 16 * 17 * Stack layout in 'syscall_exit': 18 * ptrace needs to have all regs on the stack. 19 * if the order here is changed, it needs to be 20 * updated in fork.c:copy_process, signal.c:do_signal, 21 * ptrace.c and ptrace.h 22 * 23 * 0(%esp) - %ebx 24 * 4(%esp) - %ecx 25 * 8(%esp) - %edx 26 * C(%esp) - %esi 27 * 10(%esp) - %edi 28 * 14(%esp) - %ebp 29 * 18(%esp) - %eax 30 * 1C(%esp) - %ds 31 * 20(%esp) - %es 32 * 24(%esp) - %fs 33 * 28(%esp) - orig_eax 34 * 2C(%esp) - %eip 35 * 30(%esp) - %cs 36 * 34(%esp) - %eflags 37 * 38(%esp) - %oldesp 38 * 3C(%esp) - %oldss 39 * 40 * "current" is in register %ebx during any slow entries. 41 */ 42 43#include <linux/linkage.h> 44#include <asm/thread_info.h> 45#include <asm/irqflags.h> 46#include <asm/errno.h> 47#include <asm/segment.h> 48#include <asm/smp.h> 49#include <asm/page.h> 50#include <asm/desc.h> 51#include <asm/percpu.h> 52#include <asm/dwarf2.h> 53#include <asm/processor-flags.h> 54#include <asm/ftrace.h> 55#include <asm/irq_vectors.h> 56 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 58#include <linux/elf-em.h> 59#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) 60#define __AUDIT_ARCH_LE 0x40000000 61 62#ifndef CONFIG_AUDITSYSCALL 63#define sysenter_audit syscall_trace_entry 64#define sysexit_audit syscall_exit_work 65#endif 66 67/* 68 * We use macros for low-level operations which need to be overridden 69 * for paravirtualization. The following will never clobber any registers: 70 * INTERRUPT_RETURN (aka. "iret") 71 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 72 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 73 * 74 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 75 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 76 * Allowing a register to be clobbered can shrink the paravirt replacement 77 * enough to patch inline, increasing performance. 78 */ 79 80#define nr_syscalls ((syscall_table_size)/4) 81 82#ifdef CONFIG_PREEMPT 83#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 84#else 85#define preempt_stop(clobbers) 86#define resume_kernel restore_nocheck 87#endif 88 89.macro TRACE_IRQS_IRET 90#ifdef CONFIG_TRACE_IRQFLAGS 91 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? 92 jz 1f 93 TRACE_IRQS_ON 941: 95#endif 96.endm 97 98#ifdef CONFIG_VM86 99#define resume_userspace_sig check_userspace 100#else 101#define resume_userspace_sig resume_userspace 102#endif 103 104#define SAVE_ALL \ 105 cld; \ 106 pushl %fs; \ 107 CFI_ADJUST_CFA_OFFSET 4;\ 108 /*CFI_REL_OFFSET fs, 0;*/\ 109 pushl %es; \ 110 CFI_ADJUST_CFA_OFFSET 4;\ 111 /*CFI_REL_OFFSET es, 0;*/\ 112 pushl %ds; \ 113 CFI_ADJUST_CFA_OFFSET 4;\ 114 /*CFI_REL_OFFSET ds, 0;*/\ 115 pushl %eax; \ 116 CFI_ADJUST_CFA_OFFSET 4;\ 117 CFI_REL_OFFSET eax, 0;\ 118 pushl %ebp; \ 119 CFI_ADJUST_CFA_OFFSET 4;\ 120 CFI_REL_OFFSET ebp, 0;\ 121 pushl %edi; \ 122 CFI_ADJUST_CFA_OFFSET 4;\ 123 CFI_REL_OFFSET edi, 0;\ 124 pushl %esi; \ 125 CFI_ADJUST_CFA_OFFSET 4;\ 126 CFI_REL_OFFSET esi, 0;\ 127 pushl %edx; \ 128 CFI_ADJUST_CFA_OFFSET 4;\ 129 CFI_REL_OFFSET edx, 0;\ 130 pushl %ecx; \ 131 CFI_ADJUST_CFA_OFFSET 4;\ 132 CFI_REL_OFFSET ecx, 0;\ 133 pushl %ebx; \ 134 CFI_ADJUST_CFA_OFFSET 4;\ 135 CFI_REL_OFFSET ebx, 0;\ 136 movl $(__USER_DS), %edx; \ 137 movl %edx, %ds; \ 138 movl %edx, %es; \ 139 movl $(__KERNEL_PERCPU), %edx; \ 140 movl %edx, %fs 141 142#define RESTORE_INT_REGS \ 143 popl %ebx; \ 144 CFI_ADJUST_CFA_OFFSET -4;\ 145 CFI_RESTORE ebx;\ 146 popl %ecx; \ 147 CFI_ADJUST_CFA_OFFSET -4;\ 148 CFI_RESTORE ecx;\ 149 popl %edx; \ 150 CFI_ADJUST_CFA_OFFSET -4;\ 151 CFI_RESTORE edx;\ 152 popl %esi; \ 153 CFI_ADJUST_CFA_OFFSET -4;\ 154 CFI_RESTORE esi;\ 155 popl %edi; \ 156 CFI_ADJUST_CFA_OFFSET -4;\ 157 CFI_RESTORE edi;\ 158 popl %ebp; \ 159 CFI_ADJUST_CFA_OFFSET -4;\ 160 CFI_RESTORE ebp;\ 161 popl %eax; \ 162 CFI_ADJUST_CFA_OFFSET -4;\ 163 CFI_RESTORE eax 164 165#define RESTORE_REGS \ 166 RESTORE_INT_REGS; \ 1671: popl %ds; \ 168 CFI_ADJUST_CFA_OFFSET -4;\ 169 /*CFI_RESTORE ds;*/\ 1702: popl %es; \ 171 CFI_ADJUST_CFA_OFFSET -4;\ 172 /*CFI_RESTORE es;*/\ 1733: popl %fs; \ 174 CFI_ADJUST_CFA_OFFSET -4;\ 175 /*CFI_RESTORE fs;*/\ 176.pushsection .fixup,"ax"; \ 1774: movl $0,(%esp); \ 178 jmp 1b; \ 1795: movl $0,(%esp); \ 180 jmp 2b; \ 1816: movl $0,(%esp); \ 182 jmp 3b; \ 183.section __ex_table,"a";\ 184 .align 4; \ 185 .long 1b,4b; \ 186 .long 2b,5b; \ 187 .long 3b,6b; \ 188.popsection 189 190#define RING0_INT_FRAME \ 191 CFI_STARTPROC simple;\ 192 CFI_SIGNAL_FRAME;\ 193 CFI_DEF_CFA esp, 3*4;\ 194 /*CFI_OFFSET cs, -2*4;*/\ 195 CFI_OFFSET eip, -3*4 196 197#define RING0_EC_FRAME \ 198 CFI_STARTPROC simple;\ 199 CFI_SIGNAL_FRAME;\ 200 CFI_DEF_CFA esp, 4*4;\ 201 /*CFI_OFFSET cs, -2*4;*/\ 202 CFI_OFFSET eip, -3*4 203 204#define RING0_PTREGS_FRAME \ 205 CFI_STARTPROC simple;\ 206 CFI_SIGNAL_FRAME;\ 207 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ 208 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ 209 CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ 210 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ 211 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ 212 CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ 213 CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ 214 CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ 215 CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ 216 CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ 217 CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ 218 CFI_OFFSET ebx, PT_EBX-PT_OLDESP 219 220ENTRY(ret_from_fork) 221 CFI_STARTPROC 222 pushl %eax 223 CFI_ADJUST_CFA_OFFSET 4 224 call schedule_tail 225 GET_THREAD_INFO(%ebp) 226 popl %eax 227 CFI_ADJUST_CFA_OFFSET -4 228 pushl $0x0202 # Reset kernel eflags 229 CFI_ADJUST_CFA_OFFSET 4 230 popfl 231 CFI_ADJUST_CFA_OFFSET -4 232 jmp syscall_exit 233 CFI_ENDPROC 234END(ret_from_fork) 235 236/* 237 * Return to user mode is not as complex as all this looks, 238 * but we want the default path for a system call return to 239 * go as quickly as possible which is why some of this is 240 * less clear than it otherwise should be. 241 */ 242 243 # userspace resumption stub bypassing syscall exit tracing 244 ALIGN 245 RING0_PTREGS_FRAME 246ret_from_exception: 247 preempt_stop(CLBR_ANY) 248ret_from_intr: 249 GET_THREAD_INFO(%ebp) 250check_userspace: 251 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 252 movb PT_CS(%esp), %al 253 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 254 cmpl $USER_RPL, %eax 255 jb resume_kernel # not returning to v8086 or userspace 256 257ENTRY(resume_userspace) 258 LOCKDEP_SYS_EXIT 259 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 260 # setting need_resched or sigpending 261 # between sampling and the iret 262 TRACE_IRQS_OFF 263 movl TI_flags(%ebp), %ecx 264 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on 265 # int/exception return? 266 jne work_pending 267 jmp restore_all 268END(ret_from_exception) 269 270#ifdef CONFIG_PREEMPT 271ENTRY(resume_kernel) 272 DISABLE_INTERRUPTS(CLBR_ANY) 273 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 274 jnz restore_nocheck 275need_resched: 276 movl TI_flags(%ebp), %ecx # need_resched set ? 277 testb $_TIF_NEED_RESCHED, %cl 278 jz restore_all 279 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? 280 jz restore_all 281 call preempt_schedule_irq 282 jmp need_resched 283END(resume_kernel) 284#endif 285 CFI_ENDPROC 286 287/* SYSENTER_RETURN points to after the "sysenter" instruction in 288 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 289 290 # sysenter call handler stub 291ENTRY(ia32_sysenter_target) 292 CFI_STARTPROC simple 293 CFI_SIGNAL_FRAME 294 CFI_DEF_CFA esp, 0 295 CFI_REGISTER esp, ebp 296 movl TSS_sysenter_sp0(%esp),%esp 297sysenter_past_esp: 298 /* 299 * Interrupts are disabled here, but we can't trace it until 300 * enough kernel state to call TRACE_IRQS_OFF can be called - but 301 * we immediately enable interrupts at that point anyway. 302 */ 303 pushl $(__USER_DS) 304 CFI_ADJUST_CFA_OFFSET 4 305 /*CFI_REL_OFFSET ss, 0*/ 306 pushl %ebp 307 CFI_ADJUST_CFA_OFFSET 4 308 CFI_REL_OFFSET esp, 0 309 pushfl 310 orl $X86_EFLAGS_IF, (%esp) 311 CFI_ADJUST_CFA_OFFSET 4 312 pushl $(__USER_CS) 313 CFI_ADJUST_CFA_OFFSET 4 314 /*CFI_REL_OFFSET cs, 0*/ 315 /* 316 * Push current_thread_info()->sysenter_return to the stack. 317 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 318 * pushed above; +8 corresponds to copy_thread's esp0 setting. 319 */ 320 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 321 CFI_ADJUST_CFA_OFFSET 4 322 CFI_REL_OFFSET eip, 0 323 324 pushl %eax 325 CFI_ADJUST_CFA_OFFSET 4 326 SAVE_ALL 327 ENABLE_INTERRUPTS(CLBR_NONE) 328 329/* 330 * Load the potential sixth argument from user stack. 331 * Careful about security. 332 */ 333 cmpl $__PAGE_OFFSET-3,%ebp 334 jae syscall_fault 3351: movl (%ebp),%ebp 336 movl %ebp,PT_EBP(%esp) 337.section __ex_table,"a" 338 .align 4 339 .long 1b,syscall_fault 340.previous 341 342 GET_THREAD_INFO(%ebp) 343 344 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 345 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 346 jnz sysenter_audit 347sysenter_do_call: 348 cmpl $(nr_syscalls), %eax 349 jae syscall_badsys 350 call *sys_call_table(,%eax,4) 351 movl %eax,PT_EAX(%esp) 352 LOCKDEP_SYS_EXIT 353 DISABLE_INTERRUPTS(CLBR_ANY) 354 TRACE_IRQS_OFF 355 movl TI_flags(%ebp), %ecx 356 testw $_TIF_ALLWORK_MASK, %cx 357 jne sysexit_audit 358sysenter_exit: 359/* if something modifies registers it must also disable sysexit */ 360 movl PT_EIP(%esp), %edx 361 movl PT_OLDESP(%esp), %ecx 362 xorl %ebp,%ebp 363 TRACE_IRQS_ON 3641: mov PT_FS(%esp), %fs 365 ENABLE_INTERRUPTS_SYSEXIT 366 367#ifdef CONFIG_AUDITSYSCALL 368sysenter_audit: 369 testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 370 jnz syscall_trace_entry 371 addl $4,%esp 372 CFI_ADJUST_CFA_OFFSET -4 373 /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ 374 /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ 375 /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ 376 movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ 377 movl %eax,%edx /* 2nd arg: syscall number */ 378 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ 379 call audit_syscall_entry 380 pushl %ebx 381 CFI_ADJUST_CFA_OFFSET 4 382 movl PT_EAX(%esp),%eax /* reload syscall number */ 383 jmp sysenter_do_call 384 385sysexit_audit: 386 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 387 jne syscall_exit_work 388 TRACE_IRQS_ON 389 ENABLE_INTERRUPTS(CLBR_ANY) 390 movl %eax,%edx /* second arg, syscall return value */ 391 cmpl $0,%eax /* is it < 0? */ 392 setl %al /* 1 if so, 0 if not */ 393 movzbl %al,%eax /* zero-extend that */ 394 inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 395 call audit_syscall_exit 396 DISABLE_INTERRUPTS(CLBR_ANY) 397 TRACE_IRQS_OFF 398 movl TI_flags(%ebp), %ecx 399 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 400 jne syscall_exit_work 401 movl PT_EAX(%esp),%eax /* reload syscall return value */ 402 jmp sysenter_exit 403#endif 404 405 CFI_ENDPROC 406.pushsection .fixup,"ax" 4072: movl $0,PT_FS(%esp) 408 jmp 1b 409.section __ex_table,"a" 410 .align 4 411 .long 1b,2b 412.popsection 413ENDPROC(ia32_sysenter_target) 414 415 # system call handler stub 416ENTRY(system_call) 417 RING0_INT_FRAME # can't unwind into user space anyway 418 pushl %eax # save orig_eax 419 CFI_ADJUST_CFA_OFFSET 4 420 SAVE_ALL 421 GET_THREAD_INFO(%ebp) 422 # system call tracing in operation / emulation 423 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 424 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 425 jnz syscall_trace_entry 426 cmpl $(nr_syscalls), %eax 427 jae syscall_badsys 428syscall_call: 429 call *sys_call_table(,%eax,4) 430 movl %eax,PT_EAX(%esp) # store the return value 431syscall_exit: 432 LOCKDEP_SYS_EXIT 433 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 434 # setting need_resched or sigpending 435 # between sampling and the iret 436 TRACE_IRQS_OFF 437 movl TI_flags(%ebp), %ecx 438 testw $_TIF_ALLWORK_MASK, %cx # current->work 439 jne syscall_exit_work 440 441restore_all: 442 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 443 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we 444 # are returning to the kernel. 445 # See comments in process.c:copy_thread() for details. 446 movb PT_OLDSS(%esp), %ah 447 movb PT_CS(%esp), %al 448 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 449 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 450 CFI_REMEMBER_STATE 451 je ldt_ss # returning to user-space with LDT SS 452restore_nocheck: 453 TRACE_IRQS_IRET 454restore_nocheck_notrace: 455 RESTORE_REGS 456 addl $4, %esp # skip orig_eax/error_code 457 CFI_ADJUST_CFA_OFFSET -4 458irq_return: 459 INTERRUPT_RETURN 460.section .fixup,"ax" 461ENTRY(iret_exc) 462 pushl $0 # no error code 463 pushl $do_iret_error 464 jmp error_code 465.previous 466.section __ex_table,"a" 467 .align 4 468 .long irq_return,iret_exc 469.previous 470 471 CFI_RESTORE_STATE 472ldt_ss: 473 larl PT_OLDSS(%esp), %eax 474 jnz restore_nocheck 475 testl $0x00400000, %eax # returning to 32bit stack? 476 jnz restore_nocheck # allright, normal return 477 478#ifdef CONFIG_PARAVIRT 479 /* 480 * The kernel can't run on a non-flat stack if paravirt mode 481 * is active. Rather than try to fixup the high bits of 482 * ESP, bypass this code entirely. This may break DOSemu 483 * and/or Wine support in a paravirt VM, although the option 484 * is still available to implement the setting of the high 485 * 16-bits in the INTERRUPT_RETURN paravirt-op. 486 */ 487 cmpl $0, pv_info+PARAVIRT_enabled 488 jne restore_nocheck 489#endif 490 491 /* If returning to userspace with 16bit stack, 492 * try to fix the higher word of ESP, as the CPU 493 * won't restore it. 494 * This is an "official" bug of all the x86-compatible 495 * CPUs, which we can try to work around to make 496 * dosemu and wine happy. */ 497 movl PT_OLDESP(%esp), %eax 498 movl %esp, %edx 499 call patch_espfix_desc 500 pushl $__ESPFIX_SS 501 CFI_ADJUST_CFA_OFFSET 4 502 pushl %eax 503 CFI_ADJUST_CFA_OFFSET 4 504 DISABLE_INTERRUPTS(CLBR_EAX) 505 TRACE_IRQS_OFF 506 lss (%esp), %esp 507 CFI_ADJUST_CFA_OFFSET -8 508 jmp restore_nocheck 509 CFI_ENDPROC 510ENDPROC(system_call) 511 512 # perform work that needs to be done immediately before resumption 513 ALIGN 514 RING0_PTREGS_FRAME # can't unwind into user space anyway 515work_pending: 516 testb $_TIF_NEED_RESCHED, %cl 517 jz work_notifysig 518work_resched: 519 call schedule 520 LOCKDEP_SYS_EXIT 521 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 522 # setting need_resched or sigpending 523 # between sampling and the iret 524 TRACE_IRQS_OFF 525 movl TI_flags(%ebp), %ecx 526 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other 527 # than syscall tracing? 528 jz restore_all 529 testb $_TIF_NEED_RESCHED, %cl 530 jnz work_resched 531 532work_notifysig: # deal with pending signals and 533 # notify-resume requests 534#ifdef CONFIG_VM86 535 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) 536 movl %esp, %eax 537 jne work_notifysig_v86 # returning to kernel-space or 538 # vm86-space 539 xorl %edx, %edx 540 call do_notify_resume 541 jmp resume_userspace_sig 542 543 ALIGN 544work_notifysig_v86: 545 pushl %ecx # save ti_flags for do_notify_resume 546 CFI_ADJUST_CFA_OFFSET 4 547 call save_v86_state # %eax contains pt_regs pointer 548 popl %ecx 549 CFI_ADJUST_CFA_OFFSET -4 550 movl %eax, %esp 551#else 552 movl %esp, %eax 553#endif 554 xorl %edx, %edx 555 call do_notify_resume 556 jmp resume_userspace_sig 557END(work_pending) 558 559 # perform syscall exit tracing 560 ALIGN 561syscall_trace_entry: 562 movl $-ENOSYS,PT_EAX(%esp) 563 movl %esp, %eax 564 call syscall_trace_enter 565 /* What it returned is what we'll actually use. */ 566 cmpl $(nr_syscalls), %eax 567 jnae syscall_call 568 jmp syscall_exit 569END(syscall_trace_entry) 570 571 # perform syscall exit tracing 572 ALIGN 573syscall_exit_work: 574 testb $_TIF_WORK_SYSCALL_EXIT, %cl 575 jz work_pending 576 TRACE_IRQS_ON 577 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call 578 # schedule() instead 579 movl %esp, %eax 580 call syscall_trace_leave 581 jmp resume_userspace 582END(syscall_exit_work) 583 CFI_ENDPROC 584 585 RING0_INT_FRAME # can't unwind into user space anyway 586syscall_fault: 587 GET_THREAD_INFO(%ebp) 588 movl $-EFAULT,PT_EAX(%esp) 589 jmp resume_userspace 590END(syscall_fault) 591 592syscall_badsys: 593 movl $-ENOSYS,PT_EAX(%esp) 594 jmp resume_userspace 595END(syscall_badsys) 596 CFI_ENDPROC 597 598#define FIXUP_ESPFIX_STACK \ 599 /* since we are on a wrong stack, we cant make it a C code :( */ \ 600 PER_CPU(gdt_page, %ebx); \ 601 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 602 addl %esp, %eax; \ 603 pushl $__KERNEL_DS; \ 604 CFI_ADJUST_CFA_OFFSET 4; \ 605 pushl %eax; \ 606 CFI_ADJUST_CFA_OFFSET 4; \ 607 lss (%esp), %esp; \ 608 CFI_ADJUST_CFA_OFFSET -8; 609#define UNWIND_ESPFIX_STACK \ 610 movl %ss, %eax; \ 611 /* see if on espfix stack */ \ 612 cmpw $__ESPFIX_SS, %ax; \ 613 jne 27f; \ 614 movl $__KERNEL_DS, %eax; \ 615 movl %eax, %ds; \ 616 movl %eax, %es; \ 617 /* switch to normal stack */ \ 618 FIXUP_ESPFIX_STACK; \ 61927:; 620 621/* 622 * Build the entry stubs and pointer table with some assembler magic. 623 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 624 * single cache line on all modern x86 implementations. 625 */ 626.section .init.rodata,"a" 627ENTRY(interrupt) 628.text 629 .p2align 5 630 .p2align CONFIG_X86_L1_CACHE_SHIFT 631ENTRY(irq_entries_start) 632 RING0_INT_FRAME 633vector=FIRST_EXTERNAL_VECTOR 634.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 635 .balign 32 636 .rept 7 637 .if vector < NR_VECTORS 638 .if vector <> FIRST_EXTERNAL_VECTOR 639 CFI_ADJUST_CFA_OFFSET -4 640 .endif 6411: pushl $(~vector+0x80) /* Note: always in signed byte range */ 642 CFI_ADJUST_CFA_OFFSET 4 643 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 644 jmp 2f 645 .endif 646 .previous 647 .long 1b 648 .text 649vector=vector+1 650 .endif 651 .endr 6522: jmp common_interrupt 653.endr 654END(irq_entries_start) 655 656.previous 657END(interrupt) 658.previous 659 660/* 661 * the CPU automatically disables interrupts when executing an IRQ vector, 662 * so IRQ-flags tracing has to follow that: 663 */ 664 .p2align CONFIG_X86_L1_CACHE_SHIFT 665common_interrupt: 666 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ 667 SAVE_ALL 668 TRACE_IRQS_OFF 669 movl %esp,%eax 670 call do_IRQ 671 jmp ret_from_intr 672ENDPROC(common_interrupt) 673 CFI_ENDPROC 674 675#define BUILD_INTERRUPT(name, nr) \ 676ENTRY(name) \ 677 RING0_INT_FRAME; \ 678 pushl $~(nr); \ 679 CFI_ADJUST_CFA_OFFSET 4; \ 680 SAVE_ALL; \ 681 TRACE_IRQS_OFF \ 682 movl %esp,%eax; \ 683 call smp_##name; \ 684 jmp ret_from_intr; \ 685 CFI_ENDPROC; \ 686ENDPROC(name) 687 688/* The include is where all of the SMP etc. interrupts come from */ 689#include "entry_arch.h" 690 691ENTRY(coprocessor_error) 692 RING0_INT_FRAME 693 pushl $0 694 CFI_ADJUST_CFA_OFFSET 4 695 pushl $do_coprocessor_error 696 CFI_ADJUST_CFA_OFFSET 4 697 jmp error_code 698 CFI_ENDPROC 699END(coprocessor_error) 700 701ENTRY(simd_coprocessor_error) 702 RING0_INT_FRAME 703 pushl $0 704 CFI_ADJUST_CFA_OFFSET 4 705 pushl $do_simd_coprocessor_error 706 CFI_ADJUST_CFA_OFFSET 4 707 jmp error_code 708 CFI_ENDPROC 709END(simd_coprocessor_error) 710 711ENTRY(device_not_available) 712 RING0_INT_FRAME 713 pushl $-1 # mark this as an int 714 CFI_ADJUST_CFA_OFFSET 4 715 pushl $do_device_not_available 716 CFI_ADJUST_CFA_OFFSET 4 717 jmp error_code 718 CFI_ENDPROC 719END(device_not_available) 720 721#ifdef CONFIG_PARAVIRT 722ENTRY(native_iret) 723 iret 724.section __ex_table,"a" 725 .align 4 726 .long native_iret, iret_exc 727.previous 728END(native_iret) 729 730ENTRY(native_irq_enable_sysexit) 731 sti 732 sysexit 733END(native_irq_enable_sysexit) 734#endif 735 736ENTRY(overflow) 737 RING0_INT_FRAME 738 pushl $0 739 CFI_ADJUST_CFA_OFFSET 4 740 pushl $do_overflow 741 CFI_ADJUST_CFA_OFFSET 4 742 jmp error_code 743 CFI_ENDPROC 744END(overflow) 745 746ENTRY(bounds) 747 RING0_INT_FRAME 748 pushl $0 749 CFI_ADJUST_CFA_OFFSET 4 750 pushl $do_bounds 751 CFI_ADJUST_CFA_OFFSET 4 752 jmp error_code 753 CFI_ENDPROC 754END(bounds) 755 756ENTRY(invalid_op) 757 RING0_INT_FRAME 758 pushl $0 759 CFI_ADJUST_CFA_OFFSET 4 760 pushl $do_invalid_op 761 CFI_ADJUST_CFA_OFFSET 4 762 jmp error_code 763 CFI_ENDPROC 764END(invalid_op) 765 766ENTRY(coprocessor_segment_overrun) 767 RING0_INT_FRAME 768 pushl $0 769 CFI_ADJUST_CFA_OFFSET 4 770 pushl $do_coprocessor_segment_overrun 771 CFI_ADJUST_CFA_OFFSET 4 772 jmp error_code 773 CFI_ENDPROC 774END(coprocessor_segment_overrun) 775 776ENTRY(invalid_TSS) 777 RING0_EC_FRAME 778 pushl $do_invalid_TSS 779 CFI_ADJUST_CFA_OFFSET 4 780 jmp error_code 781 CFI_ENDPROC 782END(invalid_TSS) 783 784ENTRY(segment_not_present) 785 RING0_EC_FRAME 786 pushl $do_segment_not_present 787 CFI_ADJUST_CFA_OFFSET 4 788 jmp error_code 789 CFI_ENDPROC 790END(segment_not_present) 791 792ENTRY(stack_segment) 793 RING0_EC_FRAME 794 pushl $do_stack_segment 795 CFI_ADJUST_CFA_OFFSET 4 796 jmp error_code 797 CFI_ENDPROC 798END(stack_segment) 799 800ENTRY(alignment_check) 801 RING0_EC_FRAME 802 pushl $do_alignment_check 803 CFI_ADJUST_CFA_OFFSET 4 804 jmp error_code 805 CFI_ENDPROC 806END(alignment_check) 807 808ENTRY(divide_error) 809 RING0_INT_FRAME 810 pushl $0 # no error code 811 CFI_ADJUST_CFA_OFFSET 4 812 pushl $do_divide_error 813 CFI_ADJUST_CFA_OFFSET 4 814 jmp error_code 815 CFI_ENDPROC 816END(divide_error) 817 818#ifdef CONFIG_X86_MCE 819ENTRY(machine_check) 820 RING0_INT_FRAME 821 pushl $0 822 CFI_ADJUST_CFA_OFFSET 4 823 pushl machine_check_vector 824 CFI_ADJUST_CFA_OFFSET 4 825 jmp error_code 826 CFI_ENDPROC 827END(machine_check) 828#endif 829 830ENTRY(spurious_interrupt_bug) 831 RING0_INT_FRAME 832 pushl $0 833 CFI_ADJUST_CFA_OFFSET 4 834 pushl $do_spurious_interrupt_bug 835 CFI_ADJUST_CFA_OFFSET 4 836 jmp error_code 837 CFI_ENDPROC 838END(spurious_interrupt_bug) 839 840ENTRY(kernel_thread_helper) 841 pushl $0 # fake return address for unwinder 842 CFI_STARTPROC 843 movl %edx,%eax 844 push %edx 845 CFI_ADJUST_CFA_OFFSET 4 846 call *%ebx 847 push %eax 848 CFI_ADJUST_CFA_OFFSET 4 849 call do_exit 850 ud2 # padding for call trace 851 CFI_ENDPROC 852ENDPROC(kernel_thread_helper) 853 854#ifdef CONFIG_XEN 855/* Xen doesn't set %esp to be precisely what the normal sysenter 856 entrypoint expects, so fix it up before using the normal path. */ 857ENTRY(xen_sysenter_target) 858 RING0_INT_FRAME 859 addl $5*4, %esp /* remove xen-provided frame */ 860 CFI_ADJUST_CFA_OFFSET -5*4 861 jmp sysenter_past_esp 862 CFI_ENDPROC 863 864ENTRY(xen_hypervisor_callback) 865 CFI_STARTPROC 866 pushl $0 867 CFI_ADJUST_CFA_OFFSET 4 868 SAVE_ALL 869 TRACE_IRQS_OFF 870 871 /* Check to see if we got the event in the critical 872 region in xen_iret_direct, after we've reenabled 873 events and checked for pending events. This simulates 874 iret instruction's behaviour where it delivers a 875 pending interrupt when enabling interrupts. */ 876 movl PT_EIP(%esp),%eax 877 cmpl $xen_iret_start_crit,%eax 878 jb 1f 879 cmpl $xen_iret_end_crit,%eax 880 jae 1f 881 882 jmp xen_iret_crit_fixup 883 884ENTRY(xen_do_upcall) 8851: mov %esp, %eax 886 call xen_evtchn_do_upcall 887 jmp ret_from_intr 888 CFI_ENDPROC 889ENDPROC(xen_hypervisor_callback) 890 891# Hypervisor uses this for application faults while it executes. 892# We get here for two reasons: 893# 1. Fault while reloading DS, ES, FS or GS 894# 2. Fault while executing IRET 895# Category 1 we fix up by reattempting the load, and zeroing the segment 896# register if the load fails. 897# Category 2 we fix up by jumping to do_iret_error. We cannot use the 898# normal Linux return path in this case because if we use the IRET hypercall 899# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 900# We distinguish between categories by maintaining a status value in EAX. 901ENTRY(xen_failsafe_callback) 902 CFI_STARTPROC 903 pushl %eax 904 CFI_ADJUST_CFA_OFFSET 4 905 movl $1,%eax 9061: mov 4(%esp),%ds 9072: mov 8(%esp),%es 9083: mov 12(%esp),%fs 9094: mov 16(%esp),%gs 910 testl %eax,%eax 911 popl %eax 912 CFI_ADJUST_CFA_OFFSET -4 913 lea 16(%esp),%esp 914 CFI_ADJUST_CFA_OFFSET -16 915 jz 5f 916 addl $16,%esp 917 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) 9185: pushl $0 # EAX == 0 => Category 1 (Bad segment) 919 CFI_ADJUST_CFA_OFFSET 4 920 SAVE_ALL 921 jmp ret_from_exception 922 CFI_ENDPROC 923 924.section .fixup,"ax" 9256: xorl %eax,%eax 926 movl %eax,4(%esp) 927 jmp 1b 9287: xorl %eax,%eax 929 movl %eax,8(%esp) 930 jmp 2b 9318: xorl %eax,%eax 932 movl %eax,12(%esp) 933 jmp 3b 9349: xorl %eax,%eax 935 movl %eax,16(%esp) 936 jmp 4b 937.previous 938.section __ex_table,"a" 939 .align 4 940 .long 1b,6b 941 .long 2b,7b 942 .long 3b,8b 943 .long 4b,9b 944.previous 945ENDPROC(xen_failsafe_callback) 946 947#endif /* CONFIG_XEN */ 948 949#ifdef CONFIG_FUNCTION_TRACER 950#ifdef CONFIG_DYNAMIC_FTRACE 951 952ENTRY(mcount) 953 ret 954END(mcount) 955 956ENTRY(ftrace_caller) 957 cmpl $0, function_trace_stop 958 jne ftrace_stub 959 960 pushl %eax 961 pushl %ecx 962 pushl %edx 963 movl 0xc(%esp), %eax 964 movl 0x4(%ebp), %edx 965 subl $MCOUNT_INSN_SIZE, %eax 966 967.globl ftrace_call 968ftrace_call: 969 call ftrace_stub 970 971 popl %edx 972 popl %ecx 973 popl %eax 974#ifdef CONFIG_FUNCTION_GRAPH_TRACER 975.globl ftrace_graph_call 976ftrace_graph_call: 977 jmp ftrace_stub 978#endif 979 980.globl ftrace_stub 981ftrace_stub: 982 ret 983END(ftrace_caller) 984 985#else /* ! CONFIG_DYNAMIC_FTRACE */ 986 987ENTRY(mcount) 988 cmpl $0, function_trace_stop 989 jne ftrace_stub 990 991 cmpl $ftrace_stub, ftrace_trace_function 992 jnz trace 993#ifdef CONFIG_FUNCTION_GRAPH_TRACER 994 cmpl $ftrace_stub, ftrace_graph_return 995 jnz ftrace_graph_caller 996 997 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry 998 jnz ftrace_graph_caller 999#endif 1000.globl ftrace_stub
1001ftrace_stub: 1002 ret 1003 1004 /* taken from glibc */ 1005trace: 1006 pushl %eax 1007 pushl %ecx 1008 pushl %edx 1009 movl 0xc(%esp), %eax 1010 movl 0x4(%ebp), %edx 1011 subl $MCOUNT_INSN_SIZE, %eax 1012 1013 call *ftrace_trace_function 1014 1015 popl %edx 1016 popl %ecx 1017 popl %eax 1018 jmp ftrace_stub 1019END(mcount) 1020#endif /* CONFIG_DYNAMIC_FTRACE */ 1021#endif /* CONFIG_FUNCTION_TRACER */ 1022 1023#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1024ENTRY(ftrace_graph_caller) 1025 cmpl $0, function_trace_stop 1026 jne ftrace_stub 1027 1028 pushl %eax 1029 pushl %ecx 1030 pushl %edx 1031 movl 0xc(%esp), %edx 1032 lea 0x4(%ebp), %eax 1033 subl $MCOUNT_INSN_SIZE, %edx 1034 call prepare_ftrace_return 1035 popl %edx 1036 popl %ecx 1037 popl %eax 1038 ret 1039END(ftrace_graph_caller) 1040 1041.globl return_to_handler 1042return_to_handler: 1043 pushl $0 1044 pushl %eax 1045 pushl %ecx 1046 pushl %edx 1047 call ftrace_return_to_handler 1048 movl %eax, 0xc(%esp) 1049 popl %edx 1050 popl %ecx 1051 popl %eax 1052 ret 1053#endif 1054 1055.section .rodata,"a" 1056#include "syscall_table_32.S" 1057 1058syscall_table_size=(.-sys_call_table) 1059 1060/* 1061 * Some functions should be protected against kprobes 1062 */ 1063 .pushsection .kprobes.text, "ax" 1064 1065ENTRY(page_fault) 1066 RING0_EC_FRAME 1067 pushl $do_page_fault 1068 CFI_ADJUST_CFA_OFFSET 4 1069 ALIGN 1070error_code: 1071 /* the function address is in %fs's slot on the stack */ 1072 pushl %es 1073 CFI_ADJUST_CFA_OFFSET 4 1074 /*CFI_REL_OFFSET es, 0*/ 1075 pushl %ds 1076 CFI_ADJUST_CFA_OFFSET 4 1077 /*CFI_REL_OFFSET ds, 0*/ 1078 pushl %eax 1079 CFI_ADJUST_CFA_OFFSET 4 1080 CFI_REL_OFFSET eax, 0 1081 pushl %ebp 1082 CFI_ADJUST_CFA_OFFSET 4 1083 CFI_REL_OFFSET ebp, 0 1084 pushl %edi 1085 CFI_ADJUST_CFA_OFFSET 4 1086 CFI_REL_OFFSET edi, 0 1087 pushl %esi 1088 CFI_ADJUST_CFA_OFFSET 4 1089 CFI_REL_OFFSET esi, 0 1090 pushl %edx 1091 CFI_ADJUST_CFA_OFFSET 4 1092 CFI_REL_OFFSET edx, 0 1093 pushl %ecx 1094 CFI_ADJUST_CFA_OFFSET 4 1095 CFI_REL_OFFSET ecx, 0 1096 pushl %ebx 1097 CFI_ADJUST_CFA_OFFSET 4 1098 CFI_REL_OFFSET ebx, 0 1099 cld 1100 pushl %fs 1101 CFI_ADJUST_CFA_OFFSET 4 1102 /*CFI_REL_OFFSET fs, 0*/ 1103 movl $(__KERNEL_PERCPU), %ecx 1104 movl %ecx, %fs 1105 UNWIND_ESPFIX_STACK 1106 popl %ecx 1107 CFI_ADJUST_CFA_OFFSET -4 1108 /*CFI_REGISTER es, ecx*/ 1109 movl PT_FS(%esp), %edi # get the function address 1110 movl PT_ORIG_EAX(%esp), %edx # get the error code 1111 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1112 mov %ecx, PT_FS(%esp) 1113 /*CFI_REL_OFFSET fs, ES*/ 1114 movl $(__USER_DS), %ecx 1115 movl %ecx, %ds 1116 movl %ecx, %es 1117 TRACE_IRQS_OFF 1118 movl %esp,%eax # pt_regs pointer 1119 call *%edi 1120 jmp ret_from_exception 1121 CFI_ENDPROC 1122END(page_fault) 1123 1124/* 1125 * Debug traps and NMI can happen at the one SYSENTER instruction 1126 * that sets up the real kernel stack. Check here, since we can't 1127 * allow the wrong stack to be used. 1128 * 1129 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have 1130 * already pushed 3 words if it hits on the sysenter instruction: 1131 * eflags, cs and eip. 1132 * 1133 * We just load the right stack, and push the three (known) values 1134 * by hand onto the new stack - while updating the return eip past 1135 * the instruction that would have done it for sysenter. 1136 */ 1137#define FIX_STACK(offset, ok, label) \ 1138 cmpw $__KERNEL_CS,4(%esp); \ 1139 jne ok; \ 1140label: \ 1141 movl TSS_sysenter_sp0+offset(%esp),%esp; \ 1142 CFI_DEF_CFA esp, 0; \ 1143 CFI_UNDEFINED eip; \ 1144 pushfl; \ 1145 CFI_ADJUST_CFA_OFFSET 4; \ 1146 pushl $__KERNEL_CS; \ 1147 CFI_ADJUST_CFA_OFFSET 4; \ 1148 pushl $sysenter_past_esp; \ 1149 CFI_ADJUST_CFA_OFFSET 4; \ 1150 CFI_REL_OFFSET eip, 0 1151 1152ENTRY(debug) 1153 RING0_INT_FRAME 1154 cmpl $ia32_sysenter_target,(%esp) 1155 jne debug_stack_correct 1156 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 1157debug_stack_correct: 1158 pushl $-1 # mark this as an int 1159 CFI_ADJUST_CFA_OFFSET 4 1160 SAVE_ALL 1161 TRACE_IRQS_OFF 1162 xorl %edx,%edx # error code 0 1163 movl %esp,%eax # pt_regs pointer 1164 call do_debug 1165 jmp ret_from_exception 1166 CFI_ENDPROC 1167END(debug) 1168 1169/* 1170 * NMI is doubly nasty. It can happen _while_ we're handling 1171 * a debug fault, and the debug fault hasn't yet been able to 1172 * clear up the stack. So we first check whether we got an 1173 * NMI on the sysenter entry path, but after that we need to 1174 * check whether we got an NMI on the debug path where the debug 1175 * fault happened on the sysenter path. 1176 */ 1177ENTRY(nmi) 1178 RING0_INT_FRAME 1179 pushl %eax 1180 CFI_ADJUST_CFA_OFFSET 4 1181 movl %ss, %eax 1182 cmpw $__ESPFIX_SS, %ax 1183 popl %eax 1184 CFI_ADJUST_CFA_OFFSET -4 1185 je nmi_espfix_stack 1186 cmpl $ia32_sysenter_target,(%esp) 1187 je nmi_stack_fixup 1188 pushl %eax 1189 CFI_ADJUST_CFA_OFFSET 4 1190 movl %esp,%eax 1191 /* Do not access memory above the end of our stack page, 1192 * it might not exist. 1193 */ 1194 andl $(THREAD_SIZE-1),%eax 1195 cmpl $(THREAD_SIZE-20),%eax 1196 popl %eax 1197 CFI_ADJUST_CFA_OFFSET -4 1198 jae nmi_stack_correct 1199 cmpl $ia32_sysenter_target,12(%esp) 1200 je nmi_debug_stack_check 1201nmi_stack_correct: 1202 /* We have a RING0_INT_FRAME here */ 1203 pushl %eax 1204 CFI_ADJUST_CFA_OFFSET 4 1205 SAVE_ALL 1206 xorl %edx,%edx # zero error code 1207 movl %esp,%eax # pt_regs pointer 1208 call do_nmi 1209 jmp restore_nocheck_notrace 1210 CFI_ENDPROC 1211 1212nmi_stack_fixup: 1213 RING0_INT_FRAME 1214 FIX_STACK(12,nmi_stack_correct, 1) 1215 jmp nmi_stack_correct 1216 1217nmi_debug_stack_check: 1218 /* We have a RING0_INT_FRAME here */ 1219 cmpw $__KERNEL_CS,16(%esp) 1220 jne nmi_stack_correct 1221 cmpl $debug,(%esp) 1222 jb nmi_stack_correct 1223 cmpl $debug_esp_fix_insn,(%esp) 1224 ja nmi_stack_correct 1225 FIX_STACK(24,nmi_stack_correct, 1) 1226 jmp nmi_stack_correct 1227 1228nmi_espfix_stack: 1229 /* We have a RING0_INT_FRAME here. 1230 * 1231 * create the pointer to lss back 1232 */ 1233 pushl %ss 1234 CFI_ADJUST_CFA_OFFSET 4 1235 pushl %esp 1236 CFI_ADJUST_CFA_OFFSET 4 1237 addw $4, (%esp) 1238 /* copy the iret frame of 12 bytes */ 1239 .rept 3 1240 pushl 16(%esp) 1241 CFI_ADJUST_CFA_OFFSET 4 1242 .endr 1243 pushl %eax 1244 CFI_ADJUST_CFA_OFFSET 4 1245 SAVE_ALL 1246 FIXUP_ESPFIX_STACK # %eax == %esp 1247 xorl %edx,%edx # zero error code 1248 call do_nmi 1249 RESTORE_REGS 1250 lss 12+4(%esp), %esp # back to espfix stack 1251 CFI_ADJUST_CFA_OFFSET -24 1252 jmp irq_return 1253 CFI_ENDPROC 1254END(nmi) 1255 1256ENTRY(int3) 1257 RING0_INT_FRAME 1258 pushl $-1 # mark this as an int 1259 CFI_ADJUST_CFA_OFFSET 4 1260 SAVE_ALL 1261 TRACE_IRQS_OFF 1262 xorl %edx,%edx # zero error code 1263 movl %esp,%eax # pt_regs pointer 1264 call do_int3 1265 jmp ret_from_exception 1266 CFI_ENDPROC 1267END(int3) 1268 1269ENTRY(general_protection) 1270 RING0_EC_FRAME 1271 pushl $do_general_protection 1272 CFI_ADJUST_CFA_OFFSET 4 1273 jmp error_code 1274 CFI_ENDPROC 1275END(general_protection) 1276 1277/* 1278 * End of kprobes section 1279 */ 1280 .popsection 1281

