linux/arch/x86/kernel/process_64.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1995  Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *      Gareth Hughes <gareth@valinux.com>, May 2000
   6 *
   7 *  X86-64 port
   8 *      Andi Kleen.
   9 *
  10 *      CPU hotplug support - ashok.raj@intel.com
  11 */
  12
  13/*
  14 * This file handles the architecture-dependent parts of process handling..
  15 */
  16
  17#include <stdarg.h>
  18
  19#include <linux/cpu.h>
  20#include <linux/errno.h>
  21#include <linux/sched.h>
  22#include <linux/fs.h>
  23#include <linux/kernel.h>
  24#include <linux/mm.h>
  25#include <linux/elfcore.h>
  26#include <linux/smp.h>
  27#include <linux/slab.h>
  28#include <linux/user.h>
  29#include <linux/interrupt.h>
  30#include <linux/utsname.h>
  31#include <linux/delay.h>
  32#include <linux/module.h>
  33#include <linux/ptrace.h>
  34#include <linux/random.h>
  35#include <linux/notifier.h>
  36#include <linux/kprobes.h>
  37#include <linux/kdebug.h>
  38#include <linux/tick.h>
  39
  40#include <asm/uaccess.h>
  41#include <asm/pgtable.h>
  42#include <asm/system.h>
  43#include <asm/io.h>
  44#include <asm/processor.h>
  45#include <asm/i387.h>
  46#include <asm/mmu_context.h>
  47#include <asm/pda.h>
  48#include <asm/prctl.h>
  49#include <asm/desc.h>
  50#include <asm/proto.h>
  51#include <asm/ia32.h>
  52#include <asm/idle.h>
  53
  54asmlinkage extern void ret_from_fork(void);
  55
  56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
  57
  58unsigned long boot_option_idle_override = 0;
  59EXPORT_SYMBOL(boot_option_idle_override);
  60
  61/*
  62 * Powermanagement idle function, if any..
  63 */
  64void (*pm_idle)(void);
  65EXPORT_SYMBOL(pm_idle);
  66
  67static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  68
  69void idle_notifier_register(struct notifier_block *n)
  70{
  71        atomic_notifier_chain_register(&idle_notifier, n);
  72}
  73
  74void enter_idle(void)
  75{
  76        write_pda(isidle, 1);
  77        atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
  78}
  79
  80static void __exit_idle(void)
  81{
  82        if (test_and_clear_bit_pda(0, isidle) == 0)
  83                return;
  84        atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
  85}
  86
  87/* Called from interrupts to signify idle end */
  88void exit_idle(void)
  89{
  90        /* idle loop has pid 0 */
  91        if (current->pid)
  92                return;
  93        __exit_idle();
  94}
  95
  96/*
  97 * We use this if we don't have any better
  98 * idle routine..
  99 */
 100void default_idle(void)
 101{
 102        current_thread_info()->status &= ~TS_POLLING;
 103        /*
 104         * TS_POLLING-cleared state must be visible before we
 105         * test NEED_RESCHED:
 106         */
 107        smp_mb();
 108        local_irq_disable();
 109        if (!need_resched()) {
 110                ktime_t t0, t1;
 111                u64 t0n, t1n;
 112
 113                t0 = ktime_get();
 114                t0n = ktime_to_ns(t0);
 115                safe_halt();    /* enables interrupts racelessly */
 116                local_irq_disable();
 117                t1 = ktime_get();
 118                t1n = ktime_to_ns(t1);
 119                sched_clock_idle_wakeup_event(t1n - t0n);
 120        }
 121        local_irq_enable();
 122        current_thread_info()->status |= TS_POLLING;
 123}
 124
 125/*
 126 * On SMP it's slightly faster (but much more power-consuming!)
 127 * to poll the ->need_resched flag instead of waiting for the
 128 * cross-CPU IPI to arrive. Use this option with caution.
 129 */
 130static void poll_idle(void)
 131{
 132        local_irq_enable();
 133        cpu_relax();
 134}
 135
 136#ifdef CONFIG_HOTPLUG_CPU
 137DECLARE_PER_CPU(int, cpu_state);
 138
 139#include <asm/nmi.h>
 140/* We halt the CPU with physical CPU hotplug */
 141static inline void play_dead(void)
 142{
 143        idle_task_exit();
 144        wbinvd();
 145        mb();
 146        /* Ack it */
 147        __get_cpu_var(cpu_state) = CPU_DEAD;
 148
 149        local_irq_disable();
 150        while (1)
 151                halt();
 152}
 153#else
 154static inline void play_dead(void)
 155{
 156        BUG();
 157}
 158#endif /* CONFIG_HOTPLUG_CPU */
 159
 160/*
 161 * The idle thread. There's no useful work to be
 162 * done, so just try to conserve power and have a
 163 * low exit latency (ie sit in a loop waiting for
 164 * somebody to say that they'd like to reschedule)
 165 */
 166void cpu_idle(void)
 167{
 168        current_thread_info()->status |= TS_POLLING;
 169        /* endless idle loop with no priority at all */
 170        while (1) {
 171                tick_nohz_stop_sched_tick();
 172                while (!need_resched()) {
 173                        void (*idle)(void);
 174
 175                        rmb();
 176                        idle = pm_idle;
 177                        if (!idle)
 178                                idle = default_idle;
 179                        if (cpu_is_offline(smp_processor_id()))
 180                                play_dead();
 181                        /*
 182                         * Idle routines should keep interrupts disabled
 183                         * from here on, until they go to idle.
 184                         * Otherwise, idle callbacks can misfire.
 185                         */
 186                        local_irq_disable();
 187                        enter_idle();
 188                        idle();
 189                        /* In many cases the interrupt that ended idle
 190                           has already called exit_idle. But some idle
 191                           loops can be woken up without interrupt. */
 192                        __exit_idle();
 193                }
 194
 195                tick_nohz_restart_sched_tick();
 196                preempt_enable_no_resched();
 197                schedule();
 198                preempt_disable();
 199        }
 200}
 201
 202static void do_nothing(void *unused)
 203{
 204}
 205
 206/*
 207 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
 208 * pm_idle and update to new pm_idle value. Required while changing pm_idle
 209 * handler on SMP systems.
 210 *
 211 * Caller must have changed pm_idle to the new value before the call. Old
 212 * pm_idle value will not be used by any CPU after the return of this function.
 213 */
 214void cpu_idle_wait(void)
 215{
 216        smp_mb();
 217        /* kick all the CPUs so that they exit out of pm_idle */
 218        smp_call_function(do_nothing, NULL, 0, 1);
 219}
 220EXPORT_SYMBOL_GPL(cpu_idle_wait);
 221
 222/*
 223 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
 224 * which can obviate IPI to trigger checking of need_resched.
 225 * We execute MONITOR against need_resched and enter optimized wait state
 226 * through MWAIT. Whenever someone changes need_resched, we would be woken
 227 * up from MWAIT (without an IPI).
 228 *
 229 * New with Core Duo processors, MWAIT can take some hints based on CPU
 230 * capability.
 231 */
 232void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 233{
 234        if (!need_resched()) {
 235                __monitor((void *)&current_thread_info()->flags, 0, 0);
 236                smp_mb();
 237                if (!need_resched())
 238                        __mwait(ax, cx);
 239        }
 240}
 241
 242/* Default MONITOR/MWAIT with no hints, used for default C1 state */
 243static void mwait_idle(void)
 244{
 245        if (!need_resched()) {
 246                __monitor((void *)&current_thread_info()->flags, 0, 0);
 247                smp_mb();
 248                if (!need_resched())
 249                        __sti_mwait(0, 0);
 250                else
 251                        local_irq_enable();
 252        } else {
 253                local_irq_enable();
 254        }
 255}
 256
 257
 258static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
 259{
 260        if (force_mwait)
 261                return 1;
 262        /* Any C1 states supported? */
 263        return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
 264}
 265
 266void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 267{
 268        static int selected;
 269
 270        if (selected)
 271                return;
 272#ifdef CONFIG_X86_SMP
 273        if (pm_idle == poll_idle && smp_num_siblings > 1) {
 274                printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
 275                        " performance may degrade.\n");
 276        }
 277#endif
 278        if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
 279                /*
 280                 * Skip, if setup has overridden idle.
 281                 * One CPU supports mwait => All CPUs supports mwait
 282                 */
 283                if (!pm_idle) {
 284                        printk(KERN_INFO "using mwait in idle threads.\n");
 285                        pm_idle = mwait_idle;
 286                }
 287        }
 288        selected = 1;
 289}
 290
 291static int __init idle_setup(char *str)
 292{
 293        if (!strcmp(str, "poll")) {
 294                printk("using polling idle threads.\n");
 295                pm_idle = poll_idle;
 296        } else if (!strcmp(str, "mwait"))
 297                force_mwait = 1;
 298        else
 299                return -1;
 300
 301        boot_option_idle_override = 1;
 302        return 0;
 303}
 304early_param("idle", idle_setup);
 305
 306/* Prints also some state that isn't saved in the pt_regs */
 307void __show_regs(struct pt_regs * regs)
 308{
 309        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 310        unsigned long d0, d1, d2, d3, d6, d7;
 311        unsigned int fsindex, gsindex;
 312        unsigned int ds, cs, es;
 313
 314        printk("\n");
 315        print_modules();
 316        printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 317                current->pid, current->comm, print_tainted(),
 318                init_utsname()->release,
 319                (int)strcspn(init_utsname()->version, " "),
 320                init_utsname()->version);
 321        printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 322        printk_address(regs->ip, 1);
 323        printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
 324                regs->flags);
 325        printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 326               regs->ax, regs->bx, regs->cx);
 327        printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
 328               regs->dx, regs->si, regs->di);
 329        printk("RBP: %016lx R08: %016lx R09: %016lx\n",
 330               regs->bp, regs->r8, regs->r9);
 331        printk("R10: %016lx R11: %016lx R12: %016lx\n",
 332               regs->r10, regs->r11, regs->r12); 
 333        printk("R13: %016lx R14: %016lx R15: %016lx\n",
 334               regs->r13, regs->r14, regs->r15); 
 335
 336        asm("movl %%ds,%0" : "=r" (ds)); 
 337        asm("movl %%cs,%0" : "=r" (cs)); 
 338        asm("movl %%es,%0" : "=r" (es)); 
 339        asm("movl %%fs,%0" : "=r" (fsindex));
 340        asm("movl %%gs,%0" : "=r" (gsindex));
 341
 342        rdmsrl(MSR_FS_BASE, fs);
 343        rdmsrl(MSR_GS_BASE, gs); 
 344        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
 345
 346        cr0 = read_cr0();
 347        cr2 = read_cr2();
 348        cr3 = read_cr3();
 349        cr4 = read_cr4();
 350
 351        printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
 352               fs,fsindex,gs,gsindex,shadowgs); 
 353        printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
 354        printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
 355
 356        get_debugreg(d0, 0);
 357        get_debugreg(d1, 1);
 358        get_debugreg(d2, 2);
 359        printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 360        get_debugreg(d3, 3);
 361        get_debugreg(d6, 6);
 362        get_debugreg(d7, 7);
 363        printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 364}
 365
 366void show_regs(struct pt_regs *regs)
 367{
 368        printk("CPU %d:", smp_processor_id());
 369        __show_regs(regs);
 370        show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 371}
 372
 373/*
 374 * Free current thread data structures etc..
 375 */
 376void exit_thread(void)
 377{
 378        struct task_struct *me = current;
 379        struct thread_struct *t = &me->thread;
 380
 381        if (me->thread.io_bitmap_ptr) {
 382                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 383
 384                kfree(t->io_bitmap_ptr);
 385                t->io_bitmap_ptr = NULL;
 386                clear_thread_flag(TIF_IO_BITMAP);
 387                /*
 388                 * Careful, clear this in the TSS too:
 389                 */
 390                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
 391                t->io_bitmap_max = 0;
 392                put_cpu();
 393        }
 394}
 395
 396void flush_thread(void)
 397{
 398        struct task_struct *tsk = current;
 399
 400        if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
 401                clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
 402                if (test_tsk_thread_flag(tsk, TIF_IA32)) {
 403                        clear_tsk_thread_flag(tsk, TIF_IA32);
 404                } else {
 405                        set_tsk_thread_flag(tsk, TIF_IA32);
 406                        current_thread_info()->status |= TS_COMPAT;
 407                }
 408        }
 409        clear_tsk_thread_flag(tsk, TIF_DEBUG);
 410
 411        tsk->thread.debugreg0 = 0;
 412        tsk->thread.debugreg1 = 0;
 413        tsk->thread.debugreg2 = 0;
 414        tsk->thread.debugreg3 = 0;
 415        tsk->thread.debugreg6 = 0;
 416        tsk->thread.debugreg7 = 0;
 417        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 418        /*
 419         * Forget coprocessor state..
 420         */
 421        clear_fpu(tsk);
 422        clear_used_math();
 423}
 424
 425void release_thread(struct task_struct *dead_task)
 426{
 427        if (dead_task->mm) {
 428                if (dead_task->mm->context.size) {
 429                        printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
 430                                        dead_task->comm,
 431                                        dead_task->mm->context.ldt,
 432                                        dead_task->mm->context.size);
 433                        BUG();
 434                }
 435        }
 436}
 437
 438static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 439{
 440        struct user_desc ud = {
 441                .base_addr = addr,
 442                .limit = 0xfffff,
 443                .seg_32bit = 1,
 444                .limit_in_pages = 1,
 445                .useable = 1,
 446        };
 447        struct desc_struct *desc = t->thread.tls_array;
 448        desc += tls;
 449        fill_ldt(desc, &ud);
 450}
 451
 452static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 453{
 454        return get_desc_base(&t->thread.tls_array[tls]);
 455}
 456
 457/*
 458 * This gets called before we allocate a new thread and copy
 459 * the current task into it.
 460 */
 461void prepare_to_copy(struct task_struct *tsk)
 462{
 463        unlazy_fpu(tsk);
 464}
 465
 466int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 467                unsigned long unused,
 468        struct task_struct * p, struct pt_regs * regs)
 469{
 470        int err;
 471        struct pt_regs * childregs;
 472        struct task_struct *me = current;
 473
 474        childregs = ((struct pt_regs *)
 475                        (THREAD_SIZE + task_stack_page(p))) - 1;
 476        *childregs = *regs;
 477
 478        childregs->ax = 0;
 479        childregs->sp = sp;
 480        if (sp == ~0UL)
 481                childregs->sp = (unsigned long)childregs;
 482
 483        p->thread.sp = (unsigned long) childregs;
 484        p->thread.sp0 = (unsigned long) (childregs+1);
 485        p->thread.usersp = me->thread.usersp;
 486
 487        set_tsk_thread_flag(p, TIF_FORK);
 488
 489        p->thread.fs = me->thread.fs;
 490        p->thread.gs = me->thread.gs;
 491
 492        asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
 493        asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
 494        asm("mov %%es,%0" : "=m" (p->thread.es));
 495        asm("mov %%ds,%0" : "=m" (p->thread.ds));
 496
 497        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 498                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 499                if (!p->thread.io_bitmap_ptr) {
 500                        p->thread.io_bitmap_max = 0;
 501                        return -ENOMEM;
 502                }
 503                memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 504                                IO_BITMAP_BYTES);
 505                set_tsk_thread_flag(p, TIF_IO_BITMAP);
 506        }
 507
 508        /*
 509         * Set a new TLS for the child thread?
 510         */
 511        if (clone_flags & CLONE_SETTLS) {
 512#ifdef CONFIG_IA32_EMULATION
 513                if (test_thread_flag(TIF_IA32))
 514                        err = do_set_thread_area(p, -1,
 515                                (struct user_desc __user *)childregs->si, 0);
 516                else                    
 517#endif   
 518                        err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
 519                if (err) 
 520                        goto out;
 521        }
 522        err = 0;
 523out:
 524        if (err && p->thread.io_bitmap_ptr) {
 525                kfree(p->thread.io_bitmap_ptr);
 526                p->thread.io_bitmap_max = 0;
 527        }
 528        return err;
 529}
 530
 531/*
 532 * This special macro can be used to load a debugging register
 533 */
 534#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
 535
 536static inline void __switch_to_xtra(struct task_struct *prev_p,
 537                                    struct task_struct *next_p,
 538                                    struct tss_struct *tss)
 539{
 540        struct thread_struct *prev, *next;
 541        unsigned long debugctl;
 542
 543        prev = &prev_p->thread,
 544        next = &next_p->thread;
 545
 546        debugctl = prev->debugctlmsr;
 547        if (next->ds_area_msr != prev->ds_area_msr) {
 548                /* we clear debugctl to make sure DS
 549                 * is not in use when we change it */
 550                debugctl = 0;
 551                wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
 552                wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
 553        }
 554
 555        if (next->debugctlmsr != debugctl)
 556                wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
 557
 558        if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 559                loaddebug(next, 0);
 560                loaddebug(next, 1);
 561                loaddebug(next, 2);
 562                loaddebug(next, 3);
 563                /* no 4 and 5 */
 564                loaddebug(next, 6);
 565                loaddebug(next, 7);
 566        }
 567
 568        if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 569                /*
 570                 * Copy the relevant range of the IO bitmap.
 571                 * Normally this is 128 bytes or less:
 572                 */
 573                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 574                       max(prev->io_bitmap_max, next->io_bitmap_max));
 575        } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 576                /*
 577                 * Clear any possible leftover bits:
 578                 */
 579                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 580        }
 581
 582#ifdef X86_BTS
 583        if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 584                ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 585
 586        if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 587                ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
 588#endif
 589}
 590
 591/*
 592 *      switch_to(x,y) should switch tasks from x to y.
 593 *
 594 * This could still be optimized:
 595 * - fold all the options into a flag word and test it with a single test.
 596 * - could test fs/gs bitsliced
 597 *
 598 * Kprobes not supported here. Set the probe on schedule instead.
 599 */
 600struct task_struct *
 601__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 602{
 603        struct thread_struct *prev = &prev_p->thread,
 604                                 *next = &next_p->thread;
 605        int cpu = smp_processor_id();
 606        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 607
 608        /* we're going to use this soon, after a few expensive things */
 609        if (next_p->fpu_counter>5)
 610                prefetch(&next->i387.fxsave);
 611
 612        /*
 613         * Reload esp0, LDT and the page table pointer:
 614         */
 615        load_sp0(tss, next);
 616
 617        /* 
 618         * Switch DS and ES.
 619         * This won't pick up thread selector changes, but I guess that is ok.
 620         */
 621        asm volatile("mov %%es,%0" : "=m" (prev->es));
 622        if (unlikely(next->es | prev->es))
 623                loadsegment(es, next->es); 
 624        
 625        asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
 626        if (unlikely(next->ds | prev->ds))
 627                loadsegment(ds, next->ds);
 628
 629        load_TLS(next, cpu);
 630
 631        /* 
 632         * Switch FS and GS.
 633         */
 634        { 
 635                unsigned fsindex;
 636                asm volatile("movl %%fs,%0" : "=r" (fsindex)); 
 637                /* segment register != 0 always requires a reload. 
 638                   also reload when it has changed. 
 639                   when prev process used 64bit base always reload
 640                   to avoid an information leak. */
 641                if (unlikely(fsindex | next->fsindex | prev->fs)) {
 642                        loadsegment(fs, next->fsindex);
 643                        /* check if the user used a selector != 0
 644                         * if yes clear 64bit base, since overloaded base
 645                         * is always mapped to the Null selector
 646                         */
 647                        if (fsindex)
 648                        prev->fs = 0;                           
 649                }
 650                /* when next process has a 64bit base use it */
 651                if (next->fs) 
 652                        wrmsrl(MSR_FS_BASE, next->fs); 
 653                prev->fsindex = fsindex;
 654        }
 655        { 
 656                unsigned gsindex;
 657                asm volatile("movl %%gs,%0" : "=r" (gsindex)); 
 658                if (unlikely(gsindex | next->gsindex | prev->gs)) {
 659                        load_gs_index(next->gsindex);
 660                        if (gsindex)
 661                        prev->gs = 0;                           
 662                }
 663                if (next->gs)
 664                        wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
 665                prev->gsindex = gsindex;
 666        }
 667
 668        /* Must be after DS reload */
 669        unlazy_fpu(prev_p);
 670
 671        /* 
 672         * Switch the PDA and FPU contexts.
 673         */
 674        prev->usersp = read_pda(oldrsp);
 675        write_pda(oldrsp, next->usersp);
 676        write_pda(pcurrent, next_p); 
 677
 678        write_pda(kernelstack,
 679        (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
 680#ifdef CONFIG_CC_STACKPROTECTOR
 681        write_pda(stack_canary, next_p->stack_canary);
 682        /*
 683         * Build time only check to make sure the stack_canary is at
 684         * offset 40 in the pda; this is a gcc ABI requirement
 685         */
 686        BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
 687#endif
 688
 689        /*
 690         * Now maybe reload the debug registers and handle I/O bitmaps
 691         */
 692        if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
 693                     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 694                __switch_to_xtra(prev_p, next_p, tss);
 695
 696        /* If the task has used fpu the last 5 timeslices, just do a full
 697         * restore of the math state immediately to avoid the trap; the
 698         * chances of needing FPU soon are obviously high now
 699         */
 700        if (next_p->fpu_counter>5)
 701                math_state_restore();
 702        return prev_p;
 703}
 704
 705/*
 706 * sys_execve() executes a new program.
 707 */
 708asmlinkage
 709long sys_execve(char __user *name, char __user * __user *argv,
 710                char __user * __user *envp, struct pt_regs *regs)
 711{
 712        long error;
 713        char * filename;
 714
 715        filename = getname(name);
 716        error = PTR_ERR(filename);
 717        if (IS_ERR(filename))
 718                return error;
 719        error = do_execve(filename, argv, envp, regs);
 720        putname(filename);
 721        return error;
 722}
 723
 724void set_personality_64bit(void)
 725{
 726        /* inherit personality from parent */
 727
 728        /* Make sure to be in 64bit mode */
 729        clear_thread_flag(TIF_IA32);
 730
 731        /* TBD: overwrites user setup. Should have two bits.
 732           But 64bit processes have always behaved this way,
 733           so it's not too bad. The main problem is just that
 734           32bit childs are affected again. */
 735        current->personality &= ~READ_IMPLIES_EXEC;
 736}
 737
 738asmlinkage long sys_fork(struct pt_regs *regs)
 739{
 740        return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 741}
 742
 743asmlinkage long
 744sys_clone(unsigned long clone_flags, unsigned long newsp,
 745          void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 746{
 747        if (!newsp)
 748                newsp = regs->sp;
 749        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 750}
 751
 752/*
 753 * This is trivial, and on the face of it looks like it
 754 * could equally well be done in user mode.
 755 *
 756 * Not so, for quite unobvious reasons - register pressure.
 757 * In user mode vfork() cannot have a stack frame, and if
 758 * done by calling the "clone()" system call directly, you
 759 * do not have enough call-clobbered registers to hold all
 760 * the information you need.
 761 */
 762asmlinkage long sys_vfork(struct pt_regs *regs)
 763{
 764        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 765                    NULL, NULL);
 766}
 767
 768unsigned long get_wchan(struct task_struct *p)
 769{
 770        unsigned long stack;
 771        u64 fp,ip;
 772        int count = 0;
 773
 774        if (!p || p == current || p->state==TASK_RUNNING)
 775                return 0; 
 776        stack = (unsigned long)task_stack_page(p);
 777        if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 778                return 0;
 779        fp = *(u64 *)(p->thread.sp);
 780        do { 
 781                if (fp < (unsigned long)stack ||
 782                    fp > (unsigned long)stack+THREAD_SIZE)
 783                        return 0; 
 784                ip = *(u64 *)(fp+8);
 785                if (!in_sched_functions(ip))
 786                        return ip;
 787                fp = *(u64 *)fp; 
 788        } while (count++ < 16); 
 789        return 0;
 790}
 791
 792long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 793{ 
 794        int ret = 0; 
 795        int doit = task == current;
 796        int cpu;
 797
 798        switch (code) { 
 799        case ARCH_SET_GS:
 800                if (addr >= TASK_SIZE_OF(task))
 801                        return -EPERM; 
 802                cpu = get_cpu();
 803                /* handle small bases via the GDT because that's faster to 
 804                   switch. */
 805                if (addr <= 0xffffffff) {  
 806                        set_32bit_tls(task, GS_TLS, addr); 
 807                        if (doit) { 
 808                                load_TLS(&task->thread, cpu);
 809                                load_gs_index(GS_TLS_SEL); 
 810                        }
 811                        task->thread.gsindex = GS_TLS_SEL; 
 812                        task->thread.gs = 0;
 813                } else { 
 814                        task->thread.gsindex = 0;
 815                        task->thread.gs = addr;
 816                        if (doit) {
 817                                load_gs_index(0);
 818                                ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 819                        } 
 820                }
 821                put_cpu();
 822                break;
 823        case ARCH_SET_FS:
 824                /* Not strictly needed for fs, but do it for symmetry
 825                   with gs */
 826                if (addr >= TASK_SIZE_OF(task))
 827                        return -EPERM;
 828                cpu = get_cpu();
 829                /* handle small bases via the GDT because that's faster to
 830                   switch. */
 831                if (addr <= 0xffffffff) {
 832                        set_32bit_tls(task, FS_TLS, addr);
 833                        if (doit) {
 834                                load_TLS(&task->thread, cpu);
 835                                asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
 836                        }
 837                        task->thread.fsindex = FS_TLS_SEL;
 838                        task->thread.fs = 0;
 839                } else {
 840                        task->thread.fsindex = 0;
 841                        task->thread.fs = addr;
 842                        if (doit) {
 843                                /* set the selector to 0 to not confuse
 844                                   __switch_to */
 845                                asm volatile("movl %0,%%fs" :: "r" (0));
 846                                ret = checking_wrmsrl(MSR_FS_BASE, addr);
 847                        }
 848                }
 849                put_cpu();
 850                break;
 851        case ARCH_GET_FS: {
 852                unsigned long base;
 853                if (task->thread.fsindex == FS_TLS_SEL)
 854                        base = read_32bit_tls(task, FS_TLS);
 855                else if (doit)
 856                        rdmsrl(MSR_FS_BASE, base);
 857                else
 858                        base = task->thread.fs;
 859                ret = put_user(base, (unsigned long __user *)addr);
 860                break;
 861        }
 862        case ARCH_GET_GS: {
 863                unsigned long base;
 864                unsigned gsindex;
 865                if (task->thread.gsindex == GS_TLS_SEL)
 866                        base = read_32bit_tls(task, GS_TLS);
 867                else if (doit) {
 868                        asm("movl %%gs,%0" : "=r" (gsindex));
 869                        if (gsindex)
 870                                rdmsrl(MSR_KERNEL_GS_BASE, base);
 871                        else
 872                                base = task->thread.gs;
 873                }
 874                else
 875                        base = task->thread.gs;
 876                ret = put_user(base, (unsigned long __user *)addr);
 877                break;
 878        }
 879
 880        default:
 881                ret = -EINVAL;
 882                break;
 883        }
 884
 885        return ret;
 886}
 887
 888long sys_arch_prctl(int code, unsigned long addr)
 889{
 890        return do_arch_prctl(current, code, addr);
 891}
 892
 893unsigned long arch_align_stack(unsigned long sp)
 894{
 895        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 896                sp -= get_random_int() % 8192;
 897        return sp & ~0xf;
 898}
 899
 900unsigned long arch_randomize_brk(struct mm_struct *mm)
 901{
 902        unsigned long range_end = mm->brk + 0x02000000;
 903        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 904}
 905
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.