1
2
3
4
5
6
7#include <linux/signal.h>
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/string.h>
12#include <linux/types.h>
13#include <linux/ptrace.h>
14#include <linux/mman.h>
15#include <linux/mm.h>
16#include <linux/smp.h>
17#include <linux/interrupt.h>
18#include <linux/init.h>
19#include <linux/tty.h>
20#include <linux/vt_kern.h>
21#include <linux/highmem.h>
22#include <linux/bootmem.h>
23#include <linux/vmalloc.h>
24#include <linux/module.h>
25#include <linux/kprobes.h>
26#include <linux/uaccess.h>
27#include <linux/kdebug.h>
28#include <linux/kprobes.h>
29
30#include <asm/system.h>
31#include <asm/desc.h>
32#include <asm/segment.h>
33
34extern void die(const char *,struct pt_regs *,long);
35
36#ifdef CONFIG_KPROBES
37static inline int notify_page_fault(struct pt_regs *regs)
38{
39 int ret = 0;
40
41
42 if (!user_mode_vm(regs)) {
43 preempt_disable();
44 if (kprobe_running() && kprobe_fault_handler(regs, 14))
45 ret = 1;
46 preempt_enable();
47 }
48
49 return ret;
50}
51#else
52static inline int notify_page_fault(struct pt_regs *regs)
53{
54 return 0;
55}
56#endif
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72static inline unsigned long get_segment_eip(struct pt_regs *regs,
73 unsigned long *eip_limit)
74{
75 unsigned long eip = regs->eip;
76 unsigned seg = regs->xcs & 0xffff;
77 u32 seg_ar, seg_limit, base, *desc;
78
79
80 if (unlikely(regs->eflags & VM_MASK)) {
81 base = seg << 4;
82 *eip_limit = base + 0xffff;
83 return base + (eip & 0xffff);
84 }
85
86
87 *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
88
89
90 if (likely(SEGMENT_IS_FLAT_CODE(seg)))
91 return eip;
92
93
94
95
96 __asm__ ("larl %3,%0; lsll %3,%1"
97 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
98 if ((~seg_ar & 0x9800) || eip > seg_limit) {
99 *eip_limit = 0;
100 return 1;
101 }
102
103
104
105
106 if (seg & (1<<2)) {
107
108 mutex_lock(¤t->mm->context.lock);
109 desc = current->mm->context.ldt;
110 desc = (void *)desc + (seg & ~7);
111 } else {
112
113 desc = (u32 *)get_cpu_gdt_table(get_cpu());
114 desc = (void *)desc + (seg & ~7);
115 }
116
117
118 base = get_desc_base((unsigned long *)desc);
119
120 if (seg & (1<<2)) {
121 mutex_unlock(¤t->mm->context.lock);
122 } else
123 put_cpu();
124
125
126
127 seg_limit += base;
128 if (seg_limit < *eip_limit && seg_limit >= base)
129 *eip_limit = seg_limit;
130 return eip + base;
131}
132
133
134
135
136
137static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
138{
139 unsigned long limit;
140 unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
141 int scan_more = 1;
142 int prefetch = 0;
143 int i;
144
145 for (i = 0; scan_more && i < 15; i++) {
146 unsigned char opcode;
147 unsigned char instr_hi;
148 unsigned char instr_lo;
149
150 if (instr > (unsigned char *)limit)
151 break;
152 if (probe_kernel_address(instr, opcode))
153 break;
154
155 instr_hi = opcode & 0xf0;
156 instr_lo = opcode & 0x0f;
157 instr++;
158
159 switch (instr_hi) {
160 case 0x20:
161 case 0x30:
162
163 scan_more = ((instr_lo & 7) == 0x6);
164 break;
165
166 case 0x60:
167
168 scan_more = (instr_lo & 0xC) == 0x4;
169 break;
170 case 0xF0:
171
172 scan_more = !instr_lo || (instr_lo>>1) == 1;
173 break;
174 case 0x00:
175
176 scan_more = 0;
177 if (instr > (unsigned char *)limit)
178 break;
179 if (probe_kernel_address(instr, opcode))
180 break;
181 prefetch = (instr_lo == 0xF) &&
182 (opcode == 0x0D || opcode == 0x18);
183 break;
184 default:
185 scan_more = 0;
186 break;
187 }
188 }
189 return prefetch;
190}
191
192static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
193 unsigned long error_code)
194{
195 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
196 boot_cpu_data.x86 >= 6)) {
197
198 if (nx_enabled && (error_code & 16))
199 return 0;
200 return __is_prefetch(regs, addr);
201 }
202 return 0;
203}
204
205static noinline void force_sig_info_fault(int si_signo, int si_code,
206 unsigned long address, struct task_struct *tsk)
207{
208 siginfo_t info;
209
210 info.si_signo = si_signo;
211 info.si_errno = 0;
212 info.si_code = si_code;
213 info.si_addr = (void __user *)address;
214 force_sig_info(si_signo, &info, tsk);
215}
216
217fastcall void do_invalid_op(struct pt_regs *, unsigned long);
218
219static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
220{
221 unsigned index = pgd_index(address);
222 pgd_t *pgd_k;
223 pud_t *pud, *pud_k;
224 pmd_t *pmd, *pmd_k;
225
226 pgd += index;
227 pgd_k = init_mm.pgd + index;
228
229 if (!pgd_present(*pgd_k))
230 return NULL;
231
232
233
234
235
236
237
238 pud = pud_offset(pgd, address);
239 pud_k = pud_offset(pgd_k, address);
240 if (!pud_present(*pud_k))
241 return NULL;
242
243 pmd = pmd_offset(pud, address);
244 pmd_k = pmd_offset(pud_k, address);
245 if (!pmd_present(*pmd_k))
246 return NULL;
247 if (!pmd_present(*pmd)) {
248 set_pmd(pmd, *pmd_k);
249 arch_flush_lazy_mmu_mode();
250 } else
251 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
252 return pmd_k;
253}
254
255
256
257
258
259
260static inline int vmalloc_fault(unsigned long address)
261{
262 unsigned long pgd_paddr;
263 pmd_t *pmd_k;
264 pte_t *pte_k;
265
266
267
268
269
270
271
272 pgd_paddr = read_cr3();
273 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
274 if (!pmd_k)
275 return -1;
276 pte_k = pte_offset_kernel(pmd_k, address);
277 if (!pte_present(*pte_k))
278 return -1;
279 return 0;
280}
281
282int show_unhandled_signals = 1;
283
284
285
286
287
288
289
290
291
292
293
294
295
296fastcall void __kprobes do_page_fault(struct pt_regs *regs,
297 unsigned long error_code)
298{
299 struct task_struct *tsk;
300 struct mm_struct *mm;
301 struct vm_area_struct * vma;
302 unsigned long address;
303 int write, si_code;
304 int fault;
305
306
307
308
309 trace_hardirqs_fixup();
310
311
312 address = read_cr2();
313
314 tsk = current;
315
316 si_code = SEGV_MAPERR;
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331 if (unlikely(address >= TASK_SIZE)) {
332 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
333 return;
334 if (notify_page_fault(regs))
335 return;
336
337
338
339
340 goto bad_area_nosemaphore;
341 }
342
343 if (notify_page_fault(regs))
344 return;
345
346
347
348 if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
349 local_irq_enable();
350
351 mm = tsk->mm;
352
353
354
355
356
357 if (in_atomic() || !mm)
358 goto bad_area_nosemaphore;
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375 if (!down_read_trylock(&mm->mmap_sem)) {
376 if ((error_code & 4) == 0 &&
377 !search_exception_tables(regs->eip))
378 goto bad_area_nosemaphore;
379 down_read(&mm->mmap_sem);
380 }
381
382 vma = find_vma(mm, address);
383 if (!vma)
384 goto bad_area;
385 if (vma->vm_start <= address)
386 goto good_area;
387 if (!(vma->vm_flags & VM_GROWSDOWN))
388 goto bad_area;
389 if (error_code & 4) {
390
391
392
393
394
395
396 if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
397 goto bad_area;
398 }
399 if (expand_stack(vma, address))
400 goto bad_area;
401
402
403
404
405good_area:
406 si_code = SEGV_ACCERR;
407 write = 0;
408 switch (error_code & 3) {
409 default:
410
411 case 2:
412 if (!(vma->vm_flags & VM_WRITE))
413 goto bad_area;
414 write++;
415 break;
416 case 1:
417 goto bad_area;
418 case 0:
419 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
420 goto bad_area;
421 }
422
423 survive:
424
425
426
427
428
429 fault = handle_mm_fault(mm, vma, address, write);
430 if (unlikely(fault & VM_FAULT_ERROR)) {
431 if (fault & VM_FAULT_OOM)
432 goto out_of_memory;
433 else if (fault & VM_FAULT_SIGBUS)
434 goto do_sigbus;
435 BUG();
436 }
437 if (fault & VM_FAULT_MAJOR)
438 tsk->maj_flt++;
439 else
440 tsk->min_flt++;
441
442
443
444
445 if (regs->eflags & VM_MASK) {
446 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
447 if (bit < 32)
448 tsk->thread.screen_bitmap |= 1 << bit;
449 }
450 up_read(&mm->mmap_sem);
451 return;
452
453
454
455
456
457bad_area:
458 up_read(&mm->mmap_sem);
459
460bad_area_nosemaphore:
461
462 if (error_code & 4) {
463
464
465
466 local_irq_enable();
467
468
469
470
471
472 if (is_prefetch(regs, address, error_code))
473 return;
474
475 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
476 printk_ratelimit()) {
477 printk("%s%s[%d]: segfault at %08lx eip %08lx "
478 "esp %08lx error %lx\n",
479 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
480 tsk->comm, task_pid_nr(tsk), address, regs->eip,
481 regs->esp, error_code);
482 }
483 tsk->thread.cr2 = address;
484
485 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
486 tsk->thread.trap_no = 14;
487 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
488 return;
489 }
490
491#ifdef CONFIG_X86_F00F_BUG
492
493
494
495 if (boot_cpu_data.f00f_bug) {
496 unsigned long nr;
497
498 nr = (address - idt_descr.address) >> 3;
499
500 if (nr == 6) {
501 do_invalid_op(regs, 0);
502 return;
503 }
504 }
505#endif
506
507no_context:
508
509 if (fixup_exception(regs))
510 return;
511
512
513
514
515
516
517 if (is_prefetch(regs, address, error_code))
518 return;
519
520
521
522
523
524
525 bust_spinlocks(1);
526
527 if (oops_may_print()) {
528 __typeof__(pte_val(__pte(0))) page;
529
530#ifdef CONFIG_X86_PAE
531 if (error_code & 16) {
532 pte_t *pte = lookup_address(address);
533
534 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
535 printk(KERN_CRIT "kernel tried to execute "
536 "NX-protected page - exploit attempt? "
537 "(uid: %d)\n", current->uid);
538 }
539#endif
540 if (address < PAGE_SIZE)
541 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
542 "pointer dereference");
543 else
544 printk(KERN_ALERT "BUG: unable to handle kernel paging"
545 " request");
546 printk(" at virtual address %08lx\n",address);
547 printk(KERN_ALERT "printing eip: %08lx ", regs->eip);
548
549 page = read_cr3();
550 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
551#ifdef CONFIG_X86_PAE
552 printk("*pdpt = %016Lx ", page);
553 if ((page >> PAGE_SHIFT) < max_low_pfn
554 && page & _PAGE_PRESENT) {
555 page &= PAGE_MASK;
556 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
557 & (PTRS_PER_PMD - 1)];
558 printk(KERN_CONT "*pde = %016Lx ", page);
559 page &= ~_PAGE_NX;
560 }
561#else
562 printk("*pde = %08lx ", page);
563#endif
564
565
566
567
568
569
570
571 if ((page >> PAGE_SHIFT) < max_low_pfn
572 && (page & _PAGE_PRESENT)
573 && !(page & _PAGE_PSE)) {
574 page &= PAGE_MASK;
575 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
576 & (PTRS_PER_PTE - 1)];
577 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
578 }
579
580 printk("\n");
581 }
582
583 tsk->thread.cr2 = address;
584 tsk->thread.trap_no = 14;
585 tsk->thread.error_code = error_code;
586 die("Oops", regs, error_code);
587 bust_spinlocks(0);
588 do_exit(SIGKILL);
589
590
591
592
593
594out_of_memory:
595 up_read(&mm->mmap_sem);
596 if (is_global_init(tsk)) {
597 yield();
598 down_read(&mm->mmap_sem);
599 goto survive;
600 }
601 printk("VM: killing process %s\n", tsk->comm);
602 if (error_code & 4)
603 do_group_exit(SIGKILL);
604 goto no_context;
605
606do_sigbus:
607 up_read(&mm->mmap_sem);
608
609
610 if (!(error_code & 4))
611 goto no_context;
612
613
614 if (is_prefetch(regs, address, error_code))
615 return;
616
617 tsk->thread.cr2 = address;
618 tsk->thread.error_code = error_code;
619 tsk->thread.trap_no = 14;
620 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
621}
622
623void vmalloc_sync_all(void)
624{
625
626
627
628
629
630
631 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
632 static unsigned long start = TASK_SIZE;
633 unsigned long address;
634
635 if (SHARED_KERNEL_PMD)
636 return;
637
638 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
639 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
640 if (!test_bit(pgd_index(address), insync)) {
641 unsigned long flags;
642 struct page *page;
643
644 spin_lock_irqsave(&pgd_lock, flags);
645 for (page = pgd_list; page; page =
646 (struct page *)page->index)
647 if (!vmalloc_sync_one(page_address(page),
648 address)) {
649 BUG_ON(page != pgd_list);
650 break;
651 }
652 spin_unlock_irqrestore(&pgd_lock, flags);
653 if (!page)
654 set_bit(pgd_index(address), insync);
655 }
656 if (address == start && test_bit(pgd_index(address), insync))
657 start = address + PGDIR_SIZE;
658 }
659}
660