1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define __KERNEL_SYSCALLS__
20#include <stdarg.h>
21
22#include <linux/compiler.h>
23#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/smp.h>
28#include <linux/smp_lock.h>
29#include <linux/stddef.h>
30#include <linux/unistd.h>
31#include <linux/ptrace.h>
32#include <linux/slab.h>
33#include <linux/vmalloc.h>
34#include <linux/user.h>
35#include <linux/a.out.h>
36#include <linux/interrupt.h>
37#include <linux/config.h>
38#include <linux/delay.h>
39#include <linux/reboot.h>
40#include <linux/init.h>
41#include <linux/ctype.h>
42#include <linux/slab.h>
43
44#include <asm/uaccess.h>
45#include <asm/pgtable.h>
46#include <asm/system.h>
47#include <asm/io.h>
48#include <asm/ldt.h>
49#include <asm/processor.h>
50#include <asm/i387.h>
51#include <asm/desc.h>
52#include <asm/mmu_context.h>
53#include <asm/pda.h>
54#include <asm/prctl.h>
55#include <asm/kdebug.h>
56#include <asm/proto.h>
57#include <asm/apic.h>
58
59#include <linux/irq.h>
60
61asmlinkage extern void ret_from_fork(void);
62
63int hlt_counter;
64
65
66
67
68void (*pm_idle)(void);
69
70
71
72
73void (*pm_power_off)(void);
74
75void disable_hlt(void)
76{
77 hlt_counter++;
78}
79
80void enable_hlt(void)
81{
82 hlt_counter--;
83}
84
85
86
87
88
89static void default_idle(void)
90{
91 if (!hlt_counter) {
92 __cli();
93 if (!current->need_resched)
94 safe_halt();
95 else
96 __sti();
97 }
98}
99
100
101
102
103
104
105static void poll_idle (void)
106{
107 int oldval;
108
109 __sti();
110
111
112
113
114
115 oldval = xchg(¤t->need_resched, -1);
116
117 if (!oldval)
118 asm volatile(
119 "2:"
120 "cmpl $-1, %0;"
121 "rep; nop;"
122 "je 2b;"
123 : :"m" (current->need_resched));
124}
125
126
127
128
129
130
131
132void cpu_idle (void)
133{
134
135 init_idle();
136 current->nice = 20;
137 current->counter = -100;
138
139 while (1) {
140 void (*idle)(void) = pm_idle;
141 if (!idle)
142 idle = default_idle;
143 while (!current->need_resched)
144 idle();
145 schedule();
146 check_pgt_cache();
147 }
148}
149
150
151
152
153
154
155
156
157static void mwait_idle (void)
158{
159 int oldval;
160
161 __sti();
162
163 oldval = xchg(¤t->need_resched, -1);
164 if (!oldval) {
165 do {
166 __monitor((void *)¤t->need_resched, 0, 0);
167 if (current->need_resched != -1)
168 break;
169 __mwait(0, 0);
170 } while (current->need_resched == -1);
171 }
172}
173
174int __init select_idle_routine(struct cpuinfo_x86 *c)
175{
176 if (cpu_has(c, X86_FEATURE_MWAIT)) {
177 printk("Monitor/Mwait feature present.\n");
178
179
180
181
182
183 if (!pm_idle) {
184 pm_idle = mwait_idle;
185 }
186 return 1;
187 }
188 pm_idle = default_idle;
189 return 1;
190}
191
192
193static int __init idle_setup (char *str)
194{
195 if (!strncmp(str, "poll", 4)) {
196 printk("using polling idle threads.\n");
197 pm_idle = poll_idle;
198 } else if (!strncmp(str, "halt", 4)) {
199 printk("using halt in idle threads.\n");
200 pm_idle = default_idle;
201 }
202
203 return 1;
204}
205
206__setup("idle=", idle_setup);
207
208static struct { long x; } no_idt[3];
209static enum {
210 BOOT_BIOS = 'b',
211 BOOT_TRIPLE = 't',
212 BOOT_KBD = 'k',
213} reboot_type = BOOT_KBD;
214static int reboot_mode = 0;
215
216
217
218
219
220
221
222
223static int __init reboot_setup(char *str)
224{
225 for (;;) {
226 switch (*str) {
227 case 'w':
228 reboot_mode = 0x1234;
229 break;
230
231 case 'c':
232 reboot_mode = 0;
233 break;
234
235 case 't':
236 case 'b':
237 case 'k':
238 reboot_type = *str;
239 break;
240 }
241 if((str = strchr(str,',')) != NULL)
242 str++;
243 else
244 break;
245 }
246 return 1;
247}
248__setup("reboot=", reboot_setup);
249
250
251#define WARMBOOT_TRAMP 0x1000UL
252
253static void reboot_warm(void)
254{
255 extern unsigned char warm_reboot[], warm_reboot_end[];
256 printk("warm reboot\n");
257
258 __cli();
259
260
261 init_level4_pgt[0] = __pml4(__pa(level3_ident_pgt) | 7);
262 __flush_tlb_all();
263
264 memcpy(__va(WARMBOOT_TRAMP), warm_reboot, warm_reboot_end - warm_reboot);
265
266 asm volatile( " pushq $0\n"
267 " pushq $0x2000\n"
268 " pushfq\n"
269 " pushq %[cs]\n"
270 " pushq %[target]\n"
271 " iretq" ::
272 [cs] "i" (__KERNEL_COMPAT32_CS),
273 [target] "b" (WARMBOOT_TRAMP));
274}
275
276static void kb_wait(void)
277{
278 int i;
279
280 for (i=0; i<0x10000; i++)
281 if ((inb_p(0x64) & 0x02) == 0)
282 break;
283}
284
285
286#ifdef CONFIG_SMP
287static void smp_halt(void)
288{
289 int cpuid = safe_smp_processor_id();
290 static int first_entry = 1;
291
292 if (first_entry) {
293 first_entry = 0;
294 smp_call_function((void *)machine_restart, NULL, 1, 0);
295 }
296
297 smp_stop_cpu();
298
299
300 if (cpuid != boot_cpu_id) {
301 printk("CPU %d SMP halt\n", cpuid);
302 for (;;)
303 asm("hlt");
304 }
305
306
307 while (cpu_online_map)
308 rep_nop();
309}
310#endif
311
312void machine_restart(char * __unused)
313{
314 int i;
315
316#if CONFIG_SMP
317 smp_halt();
318#endif
319 __cli();
320
321#ifndef CONFIG_SMP
322 disable_local_APIC();
323#endif
324 disable_IO_APIC();
325
326 __sti();
327
328
329 *((unsigned short *)__va(0x472)) = reboot_mode;
330
331 for (;;) {
332
333 switch (reboot_type) {
334 case BOOT_BIOS:
335 reboot_warm();
336
337 case BOOT_KBD:
338
339 for (i=0; i<100; i++) {
340 kb_wait();
341 udelay(50);
342 outb(0xfe,0x64);
343 udelay(50);
344 }
345
346 case BOOT_TRIPLE:
347
348 *((unsigned short *)__va(0x472)) = 0;
349
350 __asm__ __volatile__("lidt (%0)": :"r" (no_idt));
351 __asm__ __volatile__("int3");
352
353 reboot_type = BOOT_KBD;
354 break;
355 }
356 }
357}
358
359void machine_halt(void)
360{
361}
362
363void machine_power_off(void)
364{
365 if (pm_power_off)
366 pm_power_off();
367}
368
369extern int printk_address(unsigned long);
370
371
372void __show_regs(struct pt_regs * regs)
373{
374 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
375 unsigned int fsindex,gsindex;
376 unsigned int ds,cs,es;
377
378 printk("\n");
379 printk("Pid: %d, comm: %.20s %s\n", current->pid, current->comm, print_tainted());
380 printk("RIP: %04lx:", regs->cs & 0xffff);
381 printk_address(regs->rip);
382 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
383 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
384 regs->rax, regs->rbx, regs->rcx);
385 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
386 regs->rdx, regs->rsi, regs->rdi);
387 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
388 regs->rbp, regs->r8, regs->r9);
389 printk("R10: %016lx R11: %016lx R12: %016lx\n",
390 regs->r10, regs->r11, regs->r12);
391 printk("R13: %016lx R14: %016lx R15: %016lx\n",
392 regs->r13, regs->r14, regs->r15);
393
394 asm("movl %%ds,%0" : "=r" (ds));
395 asm("movl %%cs,%0" : "=r" (cs));
396 asm("movl %%es,%0" : "=r" (es));
397 asm("movl %%fs,%0" : "=r" (fsindex));
398 asm("movl %%gs,%0" : "=r" (gsindex));
399
400 rdmsrl(MSR_FS_BASE, fs);
401 rdmsrl(MSR_GS_BASE, gs);
402 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
403
404 asm("movq %%cr0, %0": "=r" (cr0));
405 asm("movq %%cr2, %0": "=r" (cr2));
406 asm("movq %%cr3, %0": "=r" (cr3));
407 asm("movq %%cr4, %0": "=r" (cr4));
408
409 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
410 fs,fsindex,gs,gsindex,shadowgs);
411 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
412 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
413}
414
415void show_regs(struct pt_regs * regs)
416{
417 __show_regs(regs);
418 show_trace(®s->rsp);
419}
420
421
422
423
424void release_segments(struct mm_struct *mm)
425{
426 void * ldt = mm->context.segments;
427
428
429
430
431 if (ldt) {
432 mm->context.segments = NULL;
433 clear_LDT();
434 vfree(ldt);
435 }
436}
437
438
439
440
441void exit_thread(void)
442{
443 struct task_struct *me = current;
444 if (me->thread.io_bitmap_ptr) {
445 (init_tss + smp_processor_id())->io_map_base =
446 INVALID_IO_BITMAP_OFFSET;
447 kfree(me->thread.io_bitmap_ptr);
448 me->thread.io_bitmap_ptr = NULL;
449 }
450}
451
452void flush_thread(void)
453{
454 struct task_struct *tsk = current;
455
456 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
457
458
459
460 clear_fpu(tsk);
461 tsk->used_math = 0;
462}
463
464void release_thread(struct task_struct *dead_task)
465{
466 if (dead_task->mm) {
467 void * ldt = dead_task->mm->context.segments;
468
469
470 if (ldt) {
471 printk("WARNING: dead process %8s still has LDT? <%p>\n",
472 dead_task->comm, ldt);
473 BUG();
474 }
475 }
476}
477
478
479
480
481
482void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
483{
484 struct mm_struct * old_mm;
485 void *old_ldt, *ldt;
486
487 ldt = NULL;
488 old_mm = current->mm;
489 if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
490
491
492
493 ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
494 if (!ldt)
495 printk(KERN_WARNING "ldt allocation failed\n");
496 else
497 memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
498 }
499 new_mm->context.segments = ldt;
500 new_mm->context.cpuvalid = 0UL;
501 return;
502}
503
504int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
505 unsigned long unused,
506 struct task_struct * p, struct pt_regs * regs)
507{
508 struct pt_regs * childregs;
509 struct task_struct *me = current;
510
511 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
512
513 *childregs = *regs;
514
515 childregs->rax = 0;
516 childregs->rsp = rsp;
517 if (rsp == ~0) {
518 childregs->rsp = (unsigned long)childregs;
519 }
520
521 p->thread.rsp = (unsigned long) childregs;
522 p->thread.rsp0 = (unsigned long) (childregs+1);
523 p->thread.userrsp = current->thread.userrsp;
524
525 p->thread.rip = (unsigned long) ret_from_fork;
526
527 p->thread.fs = me->thread.fs;
528 p->thread.gs = me->thread.gs;
529
530 asm("movl %%gs,%0" : "=m" (p->thread.gsindex));
531 asm("movl %%fs,%0" : "=m" (p->thread.fsindex));
532 asm("movl %%es,%0" : "=m" (p->thread.es));
533 asm("movl %%ds,%0" : "=m" (p->thread.ds));
534
535 unlazy_fpu(current);
536 p->thread.i387 = current->thread.i387;
537
538 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
539 p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
540 if (!p->thread.io_bitmap_ptr)
541 return -ENOMEM;
542 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
543 (IO_BITMAP_SIZE+1)*4);
544 }
545
546 return 0;
547}
548
549
550
551
552#define loaddebug(thread,register) \
553 set_debug(thread->debugreg[register], register)
554
555
556
557
558
559
560
561
562struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
563{
564 struct thread_struct *prev = &prev_p->thread,
565 *next = &next_p->thread;
566 struct tss_struct *tss = init_tss + smp_processor_id();
567
568 unlazy_fpu(prev_p);
569
570
571
572
573 tss->rsp0 = next->rsp0;
574
575
576
577
578 asm volatile("movl %%es,%0" : "=m" (prev->es));
579 if (unlikely(next->es | prev->es))
580 loadsegment(es, next->es);
581
582 asm volatile ("movl %%ds,%0" : "=m" (prev->ds));
583 if (unlikely(next->ds | prev->ds))
584 loadsegment(ds, next->ds);
585
586
587
588
589 {
590 unsigned fsindex;
591 asm volatile("movl %%fs,%0" : "=g" (fsindex));
592
593
594
595
596 if (unlikely((fsindex | next->fsindex) || prev->fs)) {
597 loadsegment(fs, next->fsindex);
598
599
600
601
602 if (fsindex)
603 prev->fs = 0;
604 }
605
606 if (next->fs)
607 wrmsrl(MSR_FS_BASE, next->fs);
608 prev->fsindex = fsindex;
609 }
610 {
611 unsigned gsindex;
612 asm volatile("movl %%gs,%0" : "=g" (gsindex));
613 if (unlikely((gsindex | next->gsindex) || prev->gs)) {
614 load_gs_index(next->gsindex);
615 if (gsindex)
616 prev->gs = 0;
617 }
618 if (next->gs)
619 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
620 prev->gsindex = gsindex;
621 }
622
623
624
625
626 prev->userrsp = read_pda(oldrsp);
627 write_pda(oldrsp, next->userrsp);
628 write_pda(pcurrent, next_p);
629 write_pda(kernelstack, (unsigned long)next_p + THREAD_SIZE - PDA_STACKOFFSET);
630
631
632
633
634 if (unlikely(next->debugreg[7])) {
635 loaddebug(next, 0);
636 loaddebug(next, 1);
637 loaddebug(next, 2);
638 loaddebug(next, 3);
639
640 loaddebug(next, 6);
641 loaddebug(next, 7);
642 }
643
644
645
646
647
648 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
649 if (next->io_bitmap_ptr) {
650
651
652
653
654
655
656
657
658 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
659 IO_BITMAP_SIZE*sizeof(u32));
660 tss->io_map_base = IO_BITMAP_OFFSET;
661 } else {
662
663
664
665
666
667
668 tss->io_map_base = INVALID_IO_BITMAP_OFFSET;
669 }
670 }
671
672
673 return prev_p;
674}
675
676
677
678
679asmlinkage
680long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
681{
682 long error;
683 char * filename;
684
685 filename = getname(name);
686 error = PTR_ERR(filename);
687 if (IS_ERR(filename))
688 return error;
689 error = do_execve(filename, argv, envp, ®s);
690 if (error == 0)
691 current->ptrace &= ~PT_DTRACE;
692 putname(filename);
693 return error;
694}
695
696void set_personality_64bit(void)
697{
698
699
700
701 current->thread.flags = 0;
702}
703
704asmlinkage long sys_fork(struct pt_regs regs)
705{
706 return do_fork(SIGCHLD, regs.rsp, ®s, 0);
707}
708
709asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs regs)
710{
711 if (!newsp)
712 newsp = regs.rsp;
713 return do_fork(clone_flags, newsp, ®s, 0);
714}
715
716
717
718
719
720
721
722
723
724
725
726asmlinkage long sys_vfork(struct pt_regs regs)
727{
728 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, ®s, 0);
729}
730
731
732
733
734extern void scheduling_functions_start_here(void);
735extern void scheduling_functions_end_here(void);
736#define first_sched ((unsigned long) scheduling_functions_start_here)
737#define last_sched ((unsigned long) scheduling_functions_end_here)
738
739unsigned long get_wchan(struct task_struct *p)
740{
741 u64 fp,rip;
742 int count = 0;
743
744 if (!p || p == current || p->state==TASK_RUNNING)
745 return 0;
746 if (p->thread.rsp < (u64)p || p->thread.rsp > (u64)p + THREAD_SIZE)
747 return 0;
748 fp = *(u64 *)(p->thread.rsp);
749 do {
750 if (fp < (unsigned long)p || fp > (unsigned long)p+THREAD_SIZE)
751 return 0;
752 rip = *(u64 *)(fp+8);
753 if (rip < first_sched || rip >= last_sched)
754 return rip;
755 fp = *(u64 *)fp;
756 } while (count++ < 16);
757 return 0;
758}
759#undef last_sched
760#undef first_sched
761
762asmlinkage long sys_arch_prctl(int code, unsigned long addr)
763{
764 int ret = 0;
765 unsigned long tmp;
766
767 switch (code) {
768 case ARCH_SET_GS:
769 if (addr >= TASK_SIZE)
770 return -EPERM;
771 asm volatile("movl %0,%%gs" :: "r" (0));
772 current->thread.gsindex = 0;
773 current->thread.gs = addr;
774 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
775 break;
776 case ARCH_SET_FS:
777
778
779 if (addr >= TASK_SIZE)
780 return -EPERM;
781 asm volatile("movl %0,%%fs" :: "r" (0));
782 current->thread.fsindex = 0;
783 current->thread.fs = addr;
784 ret = checking_wrmsrl(MSR_FS_BASE, addr);
785 break;
786
787
788 case ARCH_GET_FS:
789 rdmsrl(MSR_FS_BASE, tmp);
790 ret = put_user(tmp, (unsigned long *)addr);
791 break;
792
793 case ARCH_GET_GS:
794 rdmsrl(MSR_KERNEL_GS_BASE, tmp);
795 ret = put_user(tmp, (unsigned long *)addr);
796 break;
797
798 default:
799 ret = -EINVAL;
800 break;
801 }
802 return ret;
803}
804