linux-old/arch/i386/kernel/process.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/i386/kernel/process.c
   3 *
   4 *  Copyright (C) 1995  Linus Torvalds
   5 *
   6 *  Pentium III FXSR, SSE support
   7 *      Gareth Hughes <gareth@valinux.com>, May 2000
   8 */
   9
  10/*
  11 * This file handles the architecture-dependent parts of process handling..
  12 */
  13
  14#define __KERNEL_SYSCALLS__
  15#include <stdarg.h>
  16
  17#include <linux/errno.h>
  18#include <linux/sched.h>
  19#include <linux/kernel.h>
  20#include <linux/mm.h>
  21#include <linux/smp.h>
  22#include <linux/smp_lock.h>
  23#include <linux/stddef.h>
  24#include <linux/unistd.h>
  25#include <linux/ptrace.h>
  26#include <linux/slab.h>
  27#include <linux/vmalloc.h>
  28#include <linux/user.h>
  29#include <linux/a.out.h>
  30#include <linux/interrupt.h>
  31#include <linux/config.h>
  32#include <linux/delay.h>
  33#include <linux/reboot.h>
  34#include <linux/init.h>
  35#include <linux/mc146818rtc.h>
  36
  37#include <asm/uaccess.h>
  38#include <asm/pgtable.h>
  39#include <asm/system.h>
  40#include <asm/io.h>
  41#include <asm/ldt.h>
  42#include <asm/processor.h>
  43#include <asm/i387.h>
  44#include <asm/irq.h>
  45#include <asm/desc.h>
  46#include <asm/mmu_context.h>
  47#ifdef CONFIG_MATH_EMULATION
  48#include <asm/math_emu.h>
  49#endif
  50
  51#include <linux/irq.h>
  52
  53asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
  54
  55int hlt_counter;
  56
  57/*
  58 * Powermanagement idle function, if any..
  59 */
  60void (*pm_idle)(void);
  61
  62/*
  63 * Power off function, if any
  64 */
  65void (*pm_power_off)(void);
  66
  67void disable_hlt(void)
  68{
  69        hlt_counter++;
  70}
  71
  72void enable_hlt(void)
  73{
  74        hlt_counter--;
  75}
  76
  77/*
  78 * We use this if we don't have any better
  79 * idle routine..
  80 */
  81void default_idle(void)
  82{
  83        if (current_cpu_data.hlt_works_ok && !hlt_counter) {
  84                __cli();
  85                if (!current->need_resched)
  86                        safe_halt();
  87                else
  88                        __sti();
  89        }
  90}
  91
  92/*
  93 * On SMP it's slightly faster (but much more power-consuming!)
  94 * to poll the ->need_resched flag instead of waiting for the
  95 * cross-CPU IPI to arrive. Use this option with caution.
  96 */
  97static void poll_idle (void)
  98{
  99        int oldval;
 100
 101        __sti();
 102
 103        /*
 104         * Deal with another CPU just having chosen a thread to
 105         * run here:
 106         */
 107        oldval = xchg(&current->need_resched, -1);
 108
 109        if (!oldval)
 110                asm volatile(
 111                        "2:"
 112                        "cmpl $-1, %0;"
 113                        "rep; nop;"
 114                        "je 2b;"
 115                                : :"m" (current->need_resched));
 116}
 117
 118/*
 119 * The idle thread. There's no useful work to be
 120 * done, so just try to conserve power and have a
 121 * low exit latency (ie sit in a loop waiting for
 122 * somebody to say that they'd like to reschedule)
 123 */
 124void cpu_idle (void)
 125{
 126        /* endless idle loop with no priority at all */
 127        init_idle();
 128        current->nice = 20;
 129        current->counter = -100;
 130
 131        while (1) {
 132                void (*idle)(void) = pm_idle;
 133                if (!idle)
 134                        idle = default_idle;
 135                while (!current->need_resched)
 136                        idle();
 137                schedule();
 138                check_pgt_cache();
 139        }
 140}
 141
 142static int __init idle_setup (char *str)
 143{
 144        if (!strncmp(str, "poll", 4)) {
 145                printk("using polling idle threads.\n");
 146                pm_idle = poll_idle;
 147        }
 148
 149        return 1;
 150}
 151
 152__setup("idle=", idle_setup);
 153
 154static long no_idt[2];
 155static int reboot_mode;
 156int reboot_thru_bios;
 157
 158#ifdef CONFIG_SMP
 159int reboot_smp = 0;
 160static int reboot_cpu = -1;
 161/* shamelessly grabbed from lib/vsprintf.c for readability */
 162#define is_digit(c)     ((c) >= '0' && (c) <= '9')
 163#endif
 164static int __init reboot_setup(char *str)
 165{
 166        while(1) {
 167                switch (*str) {
 168                case 'w': /* "warm" reboot (no memory testing etc) */
 169                        reboot_mode = 0x1234;
 170                        break;
 171                case 'c': /* "cold" reboot (with memory testing etc) */
 172                        reboot_mode = 0x0;
 173                        break;
 174                case 'b': /* "bios" reboot by jumping through the BIOS */
 175                        reboot_thru_bios = 1;
 176                        break;
 177                case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
 178                        reboot_thru_bios = 0;
 179                        break;
 180#ifdef CONFIG_SMP
 181                case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
 182                        reboot_smp = 1;
 183                        if (is_digit(*(str+1))) {
 184                                reboot_cpu = (int) (*(str+1) - '0');
 185                                if (is_digit(*(str+2))) 
 186                                        reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
 187                        }
 188                                /* we will leave sorting out the final value 
 189                                when we are ready to reboot, since we might not
 190                                have set up boot_cpu_id or smp_num_cpu */
 191                        break;
 192#endif
 193                }
 194                if((str = strchr(str,',')) != NULL)
 195                        str++;
 196                else
 197                        break;
 198        }
 199        return 1;
 200}
 201
 202__setup("reboot=", reboot_setup);
 203
 204/* The following code and data reboots the machine by switching to real
 205   mode and jumping to the BIOS reset entry point, as if the CPU has
 206   really been reset.  The previous version asked the keyboard
 207   controller to pulse the CPU reset line, which is more thorough, but
 208   doesn't work with at least one type of 486 motherboard.  It is easy
 209   to stop this code working; hence the copious comments. */
 210
 211static unsigned long long
 212real_mode_gdt_entries [3] =
 213{
 214        0x0000000000000000ULL,  /* Null descriptor */
 215        0x00009a000000ffffULL,  /* 16-bit real-mode 64k code at 0x00000000 */
 216        0x000092000100ffffULL   /* 16-bit real-mode 64k data at 0x00000100 */
 217};
 218
 219static struct
 220{
 221        unsigned short       size __attribute__ ((packed));
 222        unsigned long long * base __attribute__ ((packed));
 223}
 224real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
 225real_mode_idt = { 0x3ff, 0 };
 226
 227/* This is 16-bit protected mode code to disable paging and the cache,
 228   switch to real mode and jump to the BIOS reset code.
 229
 230   The instruction that switches to real mode by writing to CR0 must be
 231   followed immediately by a far jump instruction, which set CS to a
 232   valid value for real mode, and flushes the prefetch queue to avoid
 233   running instructions that have already been decoded in protected
 234   mode.
 235
 236   Clears all the flags except ET, especially PG (paging), PE
 237   (protected-mode enable) and TS (task switch for coprocessor state
 238   save).  Flushes the TLB after paging has been disabled.  Sets CD and
 239   NW, to disable the cache on a 486, and invalidates the cache.  This
 240   is more like the state of a 486 after reset.  I don't know if
 241   something else should be done for other chips.
 242
 243   More could be done here to set up the registers as if a CPU reset had
 244   occurred; hopefully real BIOSs don't assume much. */
 245
 246static unsigned char real_mode_switch [] =
 247{
 248        0x66, 0x0f, 0x20, 0xc0,                 /*    movl  %cr0,%eax        */
 249        0x66, 0x83, 0xe0, 0x11,                 /*    andl  $0x00000011,%eax */
 250        0x66, 0x0d, 0x00, 0x00, 0x00, 0x60,     /*    orl   $0x60000000,%eax */
 251        0x66, 0x0f, 0x22, 0xc0,                 /*    movl  %eax,%cr0        */
 252        0x66, 0x0f, 0x22, 0xd8,                 /*    movl  %eax,%cr3        */
 253        0x66, 0x0f, 0x20, 0xc3,                 /*    movl  %cr0,%ebx        */
 254        0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60,       /*    andl  $0x60000000,%ebx */
 255        0x74, 0x02,                             /*    jz    f                */
 256        0x0f, 0x08,                             /*    invd                   */
 257        0x24, 0x10,                             /* f: andb  $0x10,al         */
 258        0x66, 0x0f, 0x22, 0xc0                  /*    movl  %eax,%cr0        */
 259};
 260static unsigned char jump_to_bios [] =
 261{
 262        0xea, 0x00, 0x00, 0xff, 0xff            /*    ljmp  $0xffff,$0x0000  */
 263};
 264
 265static inline void kb_wait(void)
 266{
 267        int i;
 268
 269        for (i=0; i<0x10000; i++)
 270                if ((inb_p(0x64) & 0x02) == 0)
 271                        break;
 272}
 273
 274/*
 275 * Switch to real mode and then execute the code
 276 * specified by the code and length parameters.
 277 * We assume that length will aways be less that 100!
 278 */
 279void machine_real_restart(unsigned char *code, int length)
 280{
 281        unsigned long flags;
 282
 283        cli();
 284
 285        /* Write zero to CMOS register number 0x0f, which the BIOS POST
 286           routine will recognize as telling it to do a proper reboot.  (Well
 287           that's what this book in front of me says -- it may only apply to
 288           the Phoenix BIOS though, it's not clear).  At the same time,
 289           disable NMIs by setting the top bit in the CMOS address register,
 290           as we're about to do peculiar things to the CPU.  I'm not sure if
 291           `outb_p' is needed instead of just `outb'.  Use it to be on the
 292           safe side.  (Yes, CMOS_WRITE does outb_p's. -  Paul G.)
 293         */
 294
 295        spin_lock_irqsave(&rtc_lock, flags);
 296        CMOS_WRITE(0x00, 0x8f);
 297        spin_unlock_irqrestore(&rtc_lock, flags);
 298
 299        /* Remap the kernel at virtual address zero, as well as offset zero
 300           from the kernel segment.  This assumes the kernel segment starts at
 301           virtual address PAGE_OFFSET. */
 302
 303        memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
 304                sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
 305
 306        /* Make sure the first page is mapped to the start of physical memory.
 307           It is normally not mapped, to trap kernel NULL pointer dereferences. */
 308
 309        pg0[0] = _PAGE_RW | _PAGE_PRESENT;
 310
 311        /*
 312         * Use `swapper_pg_dir' as our page directory.
 313         */
 314        load_cr3(swapper_pg_dir);
 315
 316        /* Write 0x1234 to absolute memory location 0x472.  The BIOS reads
 317           this on booting to tell it to "Bypass memory test (also warm
 318           boot)".  This seems like a fairly standard thing that gets set by
 319           REBOOT.COM programs, and the previous reset routine did this
 320           too. */
 321
 322        *((unsigned short *)0x472) = reboot_mode;
 323
 324        /* For the switch to real mode, copy some code to low memory.  It has
 325           to be in the first 64k because it is running in 16-bit mode, and it
 326           has to have the same physical and virtual address, because it turns
 327           off paging.  Copy it near the end of the first page, out of the way
 328           of BIOS variables. */
 329
 330        memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
 331                real_mode_switch, sizeof (real_mode_switch));
 332        memcpy ((void *) (0x1000 - 100), code, length);
 333
 334        /* Set up the IDT for real mode. */
 335
 336        __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
 337
 338        /* Set up a GDT from which we can load segment descriptors for real
 339           mode.  The GDT is not used in real mode; it is just needed here to
 340           prepare the descriptors. */
 341
 342        __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
 343
 344        /* Load the data segment registers, and thus the descriptors ready for
 345           real mode.  The base address of each segment is 0x100, 16 times the
 346           selector value being loaded here.  This is so that the segment
 347           registers don't have to be reloaded after switching to real mode:
 348           the values are consistent for real mode operation already. */
 349
 350        __asm__ __volatile__ ("movl $0x0010,%%eax\n"
 351                                "\tmovl %%eax,%%ds\n"
 352                                "\tmovl %%eax,%%es\n"
 353                                "\tmovl %%eax,%%fs\n"
 354                                "\tmovl %%eax,%%gs\n"
 355                                "\tmovl %%eax,%%ss" : : : "eax");
 356
 357        /* Jump to the 16-bit code that we copied earlier.  It disables paging
 358           and the cache, switches to real mode, and jumps to the BIOS reset
 359           entry point. */
 360
 361        __asm__ __volatile__ ("ljmp $0x0008,%0"
 362                                :
 363                                : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
 364}
 365
 366void machine_restart(char * __unused)
 367{
 368#if CONFIG_SMP
 369        int cpuid;
 370        
 371        cpuid = GET_APIC_ID(apic_read(APIC_ID));
 372
 373        if (reboot_smp) {
 374
 375                /* check to see if reboot_cpu is valid 
 376                   if its not, default to the BSP */
 377                if ((reboot_cpu == -1) ||  
 378                      (reboot_cpu > (NR_CPUS -1))  || 
 379                      !(phys_cpu_present_map & (1<<cpuid))) 
 380                        reboot_cpu = boot_cpu_physical_apicid;
 381
 382                reboot_smp = 0;  /* use this as a flag to only go through this once*/
 383                /* re-run this function on the other CPUs
 384                   it will fall though this section since we have 
 385                   cleared reboot_smp, and do the reboot if it is the
 386                   correct CPU, otherwise it halts. */
 387                if (reboot_cpu != cpuid)
 388                        smp_call_function((void *)machine_restart , NULL, 1, 0);
 389        }
 390
 391        /* if reboot_cpu is still -1, then we want a tradional reboot, 
 392           and if we are not running on the reboot_cpu,, halt */
 393        if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
 394                for (;;)
 395                __asm__ __volatile__ ("hlt");
 396        }
 397        /*
 398         * Stop all CPUs and turn off local APICs and the IO-APIC, so
 399         * other OSs see a clean IRQ state.
 400         */
 401        smp_send_stop();
 402        disable_IO_APIC();
 403#endif
 404
 405        if(!reboot_thru_bios) {
 406                /* rebooting needs to touch the page at absolute addr 0 */
 407                *((unsigned short *)__va(0x472)) = reboot_mode;
 408                for (;;) {
 409                        int i;
 410                        for (i=0; i<100; i++) {
 411                                kb_wait();
 412                                udelay(50);
 413                                outb(0xfe,0x64);         /* pulse reset low */
 414                                udelay(50);
 415                        }
 416                        /* That didn't work - force a triple fault.. */
 417                        __asm__ __volatile__("lidt %0": :"m" (no_idt));
 418                        __asm__ __volatile__("int3");
 419                }
 420        }
 421
 422        machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
 423}
 424
 425void machine_halt(void)
 426{
 427}
 428
 429void machine_power_off(void)
 430{
 431        if (pm_power_off)
 432                pm_power_off();
 433}
 434
 435extern void show_trace(unsigned long* esp);
 436
 437void show_regs(struct pt_regs * regs)
 438{
 439        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 440
 441        printk("\n");
 442        printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
 443        printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
 444        if (regs->xcs & 3)
 445                printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 446        printk(" EFLAGS: %08lx    %s\n",regs->eflags, print_tainted());
 447        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 448                regs->eax,regs->ebx,regs->ecx,regs->edx);
 449        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
 450                regs->esi, regs->edi, regs->ebp);
 451        printk(" DS: %04x ES: %04x\n",
 452                0xffff & regs->xds,0xffff & regs->xes);
 453
 454        __asm__("movl %%cr0, %0": "=r" (cr0));
 455        __asm__("movl %%cr2, %0": "=r" (cr2));
 456        __asm__("movl %%cr3, %0": "=r" (cr3));
 457        /* This could fault if %cr4 does not exist */
 458        __asm__("1: movl %%cr4, %0              \n"
 459                "2:                             \n"
 460                ".section __ex_table,\"a\"      \n"
 461                ".long 1b,2b                    \n"
 462                ".previous                      \n"
 463                : "=r" (cr4): "0" (0));
 464        printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
 465        show_trace(&regs->esp);
 466}
 467
 468/*
 469 * No need to lock the MM as we are the last user
 470 */
 471void release_segments(struct mm_struct *mm)
 472{
 473        void * ldt = mm->context.segments;
 474
 475        /*
 476         * free the LDT
 477         */
 478        if (ldt) {
 479                mm->context.segments = NULL;
 480                clear_LDT();
 481                vfree(ldt);
 482        }
 483}
 484
 485/*
 486 * Create a kernel thread
 487 */
 488int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 489{
 490        long retval, d0;
 491
 492        __asm__ __volatile__(
 493                "movl %%esp,%%esi\n\t"
 494                "int $0x80\n\t"         /* Linux/i386 system call */
 495                "cmpl %%esp,%%esi\n\t"  /* child or parent? */
 496                "je 1f\n\t"             /* parent - jump */
 497                /* Load the argument into eax, and push it.  That way, it does
 498                 * not matter whether the called function is compiled with
 499                 * -mregparm or not.  */
 500                "movl %4,%%eax\n\t"
 501                "pushl %%eax\n\t"               
 502                "call *%5\n\t"          /* call fn */
 503                "movl %3,%0\n\t"        /* exit */
 504                "int $0x80\n"
 505                "1:\t"
 506                :"=&a" (retval), "=&S" (d0)
 507                :"0" (__NR_clone), "i" (__NR_exit),
 508                 "r" (arg), "r" (fn),
 509                 "b" (flags | CLONE_VM)
 510                : "memory");
 511
 512        return retval;
 513}
 514
 515/*
 516 * Free current thread data structures etc..
 517 */
 518void exit_thread(void)
 519{
 520        /* nothing to do ... */
 521}
 522
 523void flush_thread(void)
 524{
 525        struct task_struct *tsk = current;
 526
 527        memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
 528        /*
 529         * Forget coprocessor state..
 530         */
 531        clear_fpu(tsk);
 532        tsk->used_math = 0;
 533}
 534
 535void release_thread(struct task_struct *dead_task)
 536{
 537        if (dead_task->mm) {
 538                void * ldt = dead_task->mm->context.segments;
 539
 540                // temporary debugging check
 541                if (ldt) {
 542                        printk("WARNING: dead process %8s still has LDT? <%p>\n",
 543                                        dead_task->comm, ldt);
 544                        BUG();
 545                }
 546        }
 547
 548        release_x86_irqs(dead_task);
 549}
 550
 551/*
 552 * we do not have to muck with descriptors here, that is
 553 * done in switch_mm() as needed.
 554 */
 555void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
 556{
 557        struct mm_struct * old_mm;
 558        void *old_ldt, *ldt;
 559
 560        ldt = NULL;
 561        old_mm = current->mm;
 562        if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
 563                /*
 564                 * Completely new LDT, we initialize it from the parent:
 565                 */
 566                ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
 567                if (!ldt)
 568                        printk(KERN_WARNING "ldt allocation failed\n");
 569                else
 570                        memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
 571        }
 572        new_mm->context.segments = ldt;
 573        new_mm->context.cpuvalid = ~0UL;        /* valid on all CPU's - they can't have stale data */
 574}
 575
 576/*
 577 * Save a segment.
 578 */
 579#define savesegment(seg,value) \
 580        asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
 581
 582int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 583        unsigned long unused,
 584        struct task_struct * p, struct pt_regs * regs)
 585{
 586        struct pt_regs * childregs;
 587
 588        childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
 589        struct_cpy(childregs, regs);
 590        childregs->eax = 0;
 591        childregs->esp = esp;
 592
 593        p->thread.esp = (unsigned long) childregs;
 594        p->thread.esp0 = (unsigned long) (childregs+1);
 595
 596        p->thread.eip = (unsigned long) ret_from_fork;
 597
 598        savesegment(fs,p->thread.fs);
 599        savesegment(gs,p->thread.gs);
 600
 601        unlazy_fpu(current);
 602        struct_cpy(&p->thread.i387, &current->thread.i387);
 603
 604        return 0;
 605}
 606
 607/*
 608 * fill in the user structure for a core dump..
 609 */
 610void dump_thread(struct pt_regs * regs, struct user * dump)
 611{
 612        int i;
 613
 614/* changed the size calculations - should hopefully work better. lbt */
 615        dump->magic = CMAGIC;
 616        dump->start_code = 0;
 617        dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
 618        dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
 619        dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 620        dump->u_dsize -= dump->u_tsize;
 621        dump->u_ssize = 0;
 622        for (i = 0; i < 8; i++)
 623                dump->u_debugreg[i] = current->thread.debugreg[i];  
 624
 625        if (dump->start_stack < TASK_SIZE)
 626                dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
 627
 628        dump->regs.ebx = regs->ebx;
 629        dump->regs.ecx = regs->ecx;
 630        dump->regs.edx = regs->edx;
 631        dump->regs.esi = regs->esi;
 632        dump->regs.edi = regs->edi;
 633        dump->regs.ebp = regs->ebp;
 634        dump->regs.eax = regs->eax;
 635        dump->regs.ds = regs->xds;
 636        dump->regs.es = regs->xes;
 637        savesegment(fs,dump->regs.fs);
 638        savesegment(gs,dump->regs.gs);
 639        dump->regs.orig_eax = regs->orig_eax;
 640        dump->regs.eip = regs->eip;
 641        dump->regs.cs = regs->xcs;
 642        dump->regs.eflags = regs->eflags;
 643        dump->regs.esp = regs->esp;
 644        dump->regs.ss = regs->xss;
 645
 646        dump->u_fpvalid = dump_fpu (regs, &dump->i387);
 647}
 648
 649/*
 650 * This special macro can be used to load a debugging register
 651 */
 652#define loaddebug(thread,register) \
 653                __asm__("movl %0,%%db" #register  \
 654                        : /* no output */ \
 655                        :"r" (thread->debugreg[register]))
 656
 657/*
 658 *      switch_to(x,yn) should switch tasks from x to y.
 659 *
 660 * We fsave/fwait so that an exception goes off at the right time
 661 * (as a call from the fsave or fwait in effect) rather than to
 662 * the wrong process. Lazy FP saving no longer makes any sense
 663 * with modern CPU's, and this simplifies a lot of things (SMP
 664 * and UP become the same).
 665 *
 666 * NOTE! We used to use the x86 hardware context switching. The
 667 * reason for not using it any more becomes apparent when you
 668 * try to recover gracefully from saved state that is no longer
 669 * valid (stale segment register values in particular). With the
 670 * hardware task-switch, there is no way to fix up bad state in
 671 * a reasonable manner.
 672 *
 673 * The fact that Intel documents the hardware task-switching to
 674 * be slow is a fairly red herring - this code is not noticeably
 675 * faster. However, there _is_ some room for improvement here,
 676 * so the performance issues may eventually be a valid point.
 677 * More important, however, is the fact that this allows us much
 678 * more flexibility.
 679 */
 680void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 681{
 682        struct thread_struct *prev = &prev_p->thread,
 683                                 *next = &next_p->thread;
 684        struct tss_struct *tss = init_tss + smp_processor_id();
 685
 686        unlazy_fpu(prev_p);
 687
 688        /*
 689         * Reload esp0, LDT and the page table pointer:
 690         */
 691        tss->esp0 = next->esp0;
 692
 693        /*
 694         * Save away %fs and %gs. No need to save %es and %ds, as
 695         * those are always kernel segments while inside the kernel.
 696         */
 697        asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
 698        asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
 699
 700        /*
 701         * Restore %fs and %gs.
 702         */
 703        loadsegment(fs, next->fs);
 704        loadsegment(gs, next->gs);
 705
 706        /*
 707         * Now maybe reload the debug registers
 708         */
 709        if (next->debugreg[7]){
 710                loaddebug(next, 0);
 711                loaddebug(next, 1);
 712                loaddebug(next, 2);
 713                loaddebug(next, 3);
 714                /* no 4 and 5 */
 715                loaddebug(next, 6);
 716                loaddebug(next, 7);
 717        }
 718
 719        if (prev->ioperm || next->ioperm) {
 720                if (next->ioperm) {
 721                        /*
 722                         * 4 cachelines copy ... not good, but not that
 723                         * bad either. Anyone got something better?
 724                         * This only affects processes which use ioperm().
 725                         * [Putting the TSSs into 4k-tlb mapped regions
 726                         * and playing VM tricks to switch the IO bitmap
 727                         * is not really acceptable.]
 728                         */
 729                        memcpy(tss->io_bitmap, next->io_bitmap,
 730                                 IO_BITMAP_BYTES);
 731                        tss->bitmap = IO_BITMAP_OFFSET;
 732                } else
 733                        /*
 734                         * a bitmap offset pointing outside of the TSS limit
 735                         * causes a nicely controllable SIGSEGV if a process
 736                         * tries to use a port IO instruction. The first
 737                         * sys_ioperm() call sets up the bitmap properly.
 738                         */
 739                        tss->bitmap = INVALID_IO_BITMAP_OFFSET;
 740        }
 741}
 742
 743asmlinkage int sys_fork(struct pt_regs regs)
 744{
 745        return do_fork(SIGCHLD, regs.esp, &regs, 0);
 746}
 747
 748asmlinkage int sys_clone(struct pt_regs regs)
 749{
 750        unsigned long clone_flags;
 751        unsigned long newsp;
 752
 753        clone_flags = regs.ebx;
 754        newsp = regs.ecx;
 755        if (!newsp)
 756                newsp = regs.esp;
 757        return do_fork(clone_flags, newsp, &regs, 0);
 758}
 759
 760/*
 761 * This is trivial, and on the face of it looks like it
 762 * could equally well be done in user mode.
 763 *
 764 * Not so, for quite unobvious reasons - register pressure.
 765 * In user mode vfork() cannot have a stack frame, and if
 766 * done by calling the "clone()" system call directly, you
 767 * do not have enough call-clobbered registers to hold all
 768 * the information you need.
 769 */
 770asmlinkage int sys_vfork(struct pt_regs regs)
 771{
 772        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0);
 773}
 774
 775/*
 776 * sys_execve() executes a new program.
 777 */
 778asmlinkage int sys_execve(struct pt_regs regs)
 779{
 780        int error;
 781        char * filename;
 782
 783        filename = getname((char *) regs.ebx);
 784        error = PTR_ERR(filename);
 785        if (IS_ERR(filename))
 786                goto out;
 787        error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
 788        if (error == 0)
 789                current->ptrace &= ~PT_DTRACE;
 790        putname(filename);
 791out:
 792        return error;
 793}
 794
 795/*
 796 * These bracket the sleeping functions..
 797 */
 798extern void scheduling_functions_start_here(void);
 799extern void scheduling_functions_end_here(void);
 800#define first_sched     ((unsigned long) scheduling_functions_start_here)
 801#define last_sched      ((unsigned long) scheduling_functions_end_here)
 802
 803unsigned long get_wchan(struct task_struct *p)
 804{
 805        unsigned long ebp, esp, eip;
 806        unsigned long stack_page;
 807        int count = 0;
 808        if (!p || p == current || p->state == TASK_RUNNING)
 809                return 0;
 810        stack_page = (unsigned long)p;
 811        esp = p->thread.esp;
 812        if (!stack_page || esp < stack_page || esp > 8188+stack_page)
 813                return 0;
 814        /* include/asm-i386/system.h:switch_to() pushes ebp last. */
 815        ebp = *(unsigned long *) esp;
 816        do {
 817                if (ebp < stack_page || ebp > 8184+stack_page)
 818                        return 0;
 819                eip = *(unsigned long *) (ebp+4);
 820                if (eip < first_sched || eip >= last_sched)
 821                        return eip;
 822                ebp = *(unsigned long *) ebp;
 823        } while (count++ < 16);
 824        return 0;
 825}
 826#undef last_sched
 827#undef first_sched
 828
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.