linux/arch/x86/kernel/process_64.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1995  Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *      Gareth Hughes <gareth@valinux.com>, May 2000
   6 *
   7 *  X86-64 port
   8 *      Andi Kleen.
   9 *
  10 *      CPU hotplug support - ashok.raj@intel.com
  11 */
  12
  13/*
  14 * This file handles the architecture-dependent parts of process handling..
  15 */
  16
  17#include <stdarg.h>
  18
  19#include <linux/cpu.h>
  20#include <linux/errno.h>
  21#include <linux/sched.h>
  22#include <linux/fs.h>
  23#include <linux/kernel.h>
  24#include <linux/mm.h>
  25#include <linux/elfcore.h>
  26#include <linux/smp.h>
  27#include <linux/slab.h>
  28#include <linux/user.h>
  29#include <linux/interrupt.h>
  30#include <linux/utsname.h>
  31#include <linux/delay.h>
  32#include <linux/module.h>
  33#include <linux/ptrace.h>
  34#include <linux/random.h>
  35#include <linux/notifier.h>
  36#include <linux/kprobes.h>
  37#include <linux/kdebug.h>
  38#include <linux/tick.h>
  39#include <linux/prctl.h>
  40#include <linux/uaccess.h>
  41#include <linux/io.h>
  42
  43#include <asm/pgtable.h>
  44#include <asm/system.h>
  45#include <asm/processor.h>
  46#include <asm/i387.h>
  47#include <asm/mmu_context.h>
  48#include <asm/pda.h>
  49#include <asm/prctl.h>
  50#include <asm/desc.h>
  51#include <asm/proto.h>
  52#include <asm/ia32.h>
  53#include <asm/idle.h>
  54#include <asm/syscalls.h>
  55
  56asmlinkage extern void ret_from_fork(void);
  57
  58unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
  59
  60static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  61
  62void idle_notifier_register(struct notifier_block *n)
  63{
  64        atomic_notifier_chain_register(&idle_notifier, n);
  65}
  66EXPORT_SYMBOL_GPL(idle_notifier_register);
  67
  68void idle_notifier_unregister(struct notifier_block *n)
  69{
  70        atomic_notifier_chain_unregister(&idle_notifier, n);
  71}
  72EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  73
  74void enter_idle(void)
  75{
  76        write_pda(isidle, 1);
  77        atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
  78}
  79
  80static void __exit_idle(void)
  81{
  82        if (test_and_clear_bit_pda(0, isidle) == 0)
  83                return;
  84        atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
  85}
  86
  87/* Called from interrupts to signify idle end */
  88void exit_idle(void)
  89{
  90        /* idle loop has pid 0 */
  91        if (current->pid)
  92                return;
  93        __exit_idle();
  94}
  95
  96#ifndef CONFIG_SMP
  97static inline void play_dead(void)
  98{
  99        BUG();
 100}
 101#endif
 102
 103/*
 104 * The idle thread. There's no useful work to be
 105 * done, so just try to conserve power and have a
 106 * low exit latency (ie sit in a loop waiting for
 107 * somebody to say that they'd like to reschedule)
 108 */
 109void cpu_idle(void)
 110{
 111        current_thread_info()->status |= TS_POLLING;
 112        /* endless idle loop with no priority at all */
 113        while (1) {
 114                tick_nohz_stop_sched_tick(1);
 115                while (!need_resched()) {
 116
 117                        rmb();
 118
 119                        if (cpu_is_offline(smp_processor_id()))
 120                                play_dead();
 121                        /*
 122                         * Idle routines should keep interrupts disabled
 123                         * from here on, until they go to idle.
 124                         * Otherwise, idle callbacks can misfire.
 125                         */
 126                        local_irq_disable();
 127                        enter_idle();
 128                        /* Don't trace irqs off for idle */
 129                        stop_critical_timings();
 130                        pm_idle();
 131                        start_critical_timings();
 132                        /* In many cases the interrupt that ended idle
 133                           has already called exit_idle. But some idle
 134                           loops can be woken up without interrupt. */
 135                        __exit_idle();
 136                }
 137
 138                tick_nohz_restart_sched_tick();
 139                preempt_enable_no_resched();
 140                schedule();
 141                preempt_disable();
 142        }
 143}
 144
 145/* Prints also some state that isn't saved in the pt_regs */
 146void __show_regs(struct pt_regs *regs, int all)
 147{
 148        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 149        unsigned long d0, d1, d2, d3, d6, d7;
 150        unsigned int fsindex, gsindex;
 151        unsigned int ds, cs, es;
 152
 153        printk("\n");
 154        print_modules();
 155        printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
 156                current->pid, current->comm, print_tainted(),
 157                init_utsname()->release,
 158                (int)strcspn(init_utsname()->version, " "),
 159                init_utsname()->version);
 160        printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 161        printk_address(regs->ip, 1);
 162        printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
 163                        regs->sp, regs->flags);
 164        printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 165               regs->ax, regs->bx, regs->cx);
 166        printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 167               regs->dx, regs->si, regs->di);
 168        printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
 169               regs->bp, regs->r8, regs->r9);
 170        printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
 171               regs->r10, regs->r11, regs->r12);
 172        printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
 173               regs->r13, regs->r14, regs->r15);
 174
 175        asm("movl %%ds,%0" : "=r" (ds));
 176        asm("movl %%cs,%0" : "=r" (cs));
 177        asm("movl %%es,%0" : "=r" (es));
 178        asm("movl %%fs,%0" : "=r" (fsindex));
 179        asm("movl %%gs,%0" : "=r" (gsindex));
 180
 181        rdmsrl(MSR_FS_BASE, fs);
 182        rdmsrl(MSR_GS_BASE, gs);
 183        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 184
 185        if (!all)
 186                return;
 187
 188        cr0 = read_cr0();
 189        cr2 = read_cr2();
 190        cr3 = read_cr3();
 191        cr4 = read_cr4();
 192
 193        printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
 194               fs, fsindex, gs, gsindex, shadowgs);
 195        printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
 196                        es, cr0);
 197        printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
 198                        cr4);
 199
 200        get_debugreg(d0, 0);
 201        get_debugreg(d1, 1);
 202        get_debugreg(d2, 2);
 203        printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 204        get_debugreg(d3, 3);
 205        get_debugreg(d6, 6);
 206        get_debugreg(d7, 7);
 207        printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 208}
 209
 210void show_regs(struct pt_regs *regs)
 211{
 212        printk(KERN_INFO "CPU %d:", smp_processor_id());
 213        __show_regs(regs, 1);
 214        show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 215}
 216
 217/*
 218 * Free current thread data structures etc..
 219 */
 220void exit_thread(void)
 221{
 222        struct task_struct *me = current;
 223        struct thread_struct *t = &me->thread;
 224
 225        if (me->thread.io_bitmap_ptr) {
 226                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 227
 228                kfree(t->io_bitmap_ptr);
 229                t->io_bitmap_ptr = NULL;
 230                clear_thread_flag(TIF_IO_BITMAP);
 231                /*
 232                 * Careful, clear this in the TSS too:
 233                 */
 234                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
 235                t->io_bitmap_max = 0;
 236                put_cpu();
 237        }
 238#ifdef CONFIG_X86_DS
 239        /* Free any DS contexts that have not been properly released. */
 240        if (unlikely(t->ds_ctx)) {
 241                /* we clear debugctl to make sure DS is not used. */
 242                update_debugctlmsr(0);
 243                ds_free(t->ds_ctx);
 244        }
 245#endif /* CONFIG_X86_DS */
 246}
 247
 248void flush_thread(void)
 249{
 250        struct task_struct *tsk = current;
 251
 252        if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
 253                clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
 254                if (test_tsk_thread_flag(tsk, TIF_IA32)) {
 255                        clear_tsk_thread_flag(tsk, TIF_IA32);
 256                } else {
 257                        set_tsk_thread_flag(tsk, TIF_IA32);
 258                        current_thread_info()->status |= TS_COMPAT;
 259                }
 260        }
 261        clear_tsk_thread_flag(tsk, TIF_DEBUG);
 262
 263        tsk->thread.debugreg0 = 0;
 264        tsk->thread.debugreg1 = 0;
 265        tsk->thread.debugreg2 = 0;
 266        tsk->thread.debugreg3 = 0;
 267        tsk->thread.debugreg6 = 0;
 268        tsk->thread.debugreg7 = 0;
 269        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 270        /*
 271         * Forget coprocessor state..
 272         */
 273        tsk->fpu_counter = 0;
 274        clear_fpu(tsk);
 275        clear_used_math();
 276}
 277
 278void release_thread(struct task_struct *dead_task)
 279{
 280        if (dead_task->mm) {
 281                if (dead_task->mm->context.size) {
 282                        printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
 283                                        dead_task->comm,
 284                                        dead_task->mm->context.ldt,
 285                                        dead_task->mm->context.size);
 286                        BUG();
 287                }
 288        }
 289}
 290
 291static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 292{
 293        struct user_desc ud = {
 294                .base_addr = addr,
 295                .limit = 0xfffff,
 296                .seg_32bit = 1,
 297                .limit_in_pages = 1,
 298                .useable = 1,
 299        };
 300        struct desc_struct *desc = t->thread.tls_array;
 301        desc += tls;
 302        fill_ldt(desc, &ud);
 303}
 304
 305static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 306{
 307        return get_desc_base(&t->thread.tls_array[tls]);
 308}
 309
 310/*
 311 * This gets called before we allocate a new thread and copy
 312 * the current task into it.
 313 */
 314void prepare_to_copy(struct task_struct *tsk)
 315{
 316        unlazy_fpu(tsk);
 317}
 318
 319int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 320                unsigned long unused,
 321        struct task_struct *p, struct pt_regs *regs)
 322{
 323        int err;
 324        struct pt_regs *childregs;
 325        struct task_struct *me = current;
 326
 327        childregs = ((struct pt_regs *)
 328                        (THREAD_SIZE + task_stack_page(p))) - 1;
 329        *childregs = *regs;
 330
 331        childregs->ax = 0;
 332        childregs->sp = sp;
 333        if (sp == ~0UL)
 334                childregs->sp = (unsigned long)childregs;
 335
 336        p->thread.sp = (unsigned long) childregs;
 337        p->thread.sp0 = (unsigned long) (childregs+1);
 338        p->thread.usersp = me->thread.usersp;
 339
 340        set_tsk_thread_flag(p, TIF_FORK);
 341
 342        p->thread.fs = me->thread.fs;
 343        p->thread.gs = me->thread.gs;
 344
 345        savesegment(gs, p->thread.gsindex);
 346        savesegment(fs, p->thread.fsindex);
 347        savesegment(es, p->thread.es);
 348        savesegment(ds, p->thread.ds);
 349
 350        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 351                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 352                if (!p->thread.io_bitmap_ptr) {
 353                        p->thread.io_bitmap_max = 0;
 354                        return -ENOMEM;
 355                }
 356                memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 357                                IO_BITMAP_BYTES);
 358                set_tsk_thread_flag(p, TIF_IO_BITMAP);
 359        }
 360
 361        /*
 362         * Set a new TLS for the child thread?
 363         */
 364        if (clone_flags & CLONE_SETTLS) {
 365#ifdef CONFIG_IA32_EMULATION
 366                if (test_thread_flag(TIF_IA32))
 367                        err = do_set_thread_area(p, -1,
 368                                (struct user_desc __user *)childregs->si, 0);
 369                else
 370#endif
 371                        err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
 372                if (err)
 373                        goto out;
 374        }
 375        err = 0;
 376out:
 377        if (err && p->thread.io_bitmap_ptr) {
 378                kfree(p->thread.io_bitmap_ptr);
 379                p->thread.io_bitmap_max = 0;
 380        }
 381        return err;
 382}
 383
 384void
 385start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 386{
 387        loadsegment(fs, 0);
 388        loadsegment(es, 0);
 389        loadsegment(ds, 0);
 390        load_gs_index(0);
 391        regs->ip                = new_ip;
 392        regs->sp                = new_sp;
 393        write_pda(oldrsp, new_sp);
 394        regs->cs                = __USER_CS;
 395        regs->ss                = __USER_DS;
 396        regs->flags             = 0x200;
 397        set_fs(USER_DS);
 398        /*
 399         * Free the old FP and other extended state
 400         */
 401        free_thread_xstate(current);
 402}
 403EXPORT_SYMBOL_GPL(start_thread);
 404
 405static void hard_disable_TSC(void)
 406{
 407        write_cr4(read_cr4() | X86_CR4_TSD);
 408}
 409
 410void disable_TSC(void)
 411{
 412        preempt_disable();
 413        if (!test_and_set_thread_flag(TIF_NOTSC))
 414                /*
 415                 * Must flip the CPU state synchronously with
 416                 * TIF_NOTSC in the current running context.
 417                 */
 418                hard_disable_TSC();
 419        preempt_enable();
 420}
 421
 422static void hard_enable_TSC(void)
 423{
 424        write_cr4(read_cr4() & ~X86_CR4_TSD);
 425}
 426
 427static void enable_TSC(void)
 428{
 429        preempt_disable();
 430        if (test_and_clear_thread_flag(TIF_NOTSC))
 431                /*
 432                 * Must flip the CPU state synchronously with
 433                 * TIF_NOTSC in the current running context.
 434                 */
 435                hard_enable_TSC();
 436        preempt_enable();
 437}
 438
 439int get_tsc_mode(unsigned long adr)
 440{
 441        unsigned int val;
 442
 443        if (test_thread_flag(TIF_NOTSC))
 444                val = PR_TSC_SIGSEGV;
 445        else
 446                val = PR_TSC_ENABLE;
 447
 448        return put_user(val, (unsigned int __user *)adr);
 449}
 450
 451int set_tsc_mode(unsigned int val)
 452{
 453        if (val == PR_TSC_SIGSEGV)
 454                disable_TSC();
 455        else if (val == PR_TSC_ENABLE)
 456                enable_TSC();
 457        else
 458                return -EINVAL;
 459
 460        return 0;
 461}
 462
 463/*
 464 * This special macro can be used to load a debugging register
 465 */
 466#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
 467
 468static inline void __switch_to_xtra(struct task_struct *prev_p,
 469                                    struct task_struct *next_p,
 470                                    struct tss_struct *tss)
 471{
 472        struct thread_struct *prev, *next;
 473        unsigned long debugctl;
 474
 475        prev = &prev_p->thread,
 476        next = &next_p->thread;
 477
 478        debugctl = prev->debugctlmsr;
 479
 480#ifdef CONFIG_X86_DS
 481        {
 482                unsigned long ds_prev = 0, ds_next = 0;
 483
 484                if (prev->ds_ctx)
 485                        ds_prev = (unsigned long)prev->ds_ctx->ds;
 486                if (next->ds_ctx)
 487                        ds_next = (unsigned long)next->ds_ctx->ds;
 488
 489                if (ds_next != ds_prev) {
 490                        /*
 491                         * We clear debugctl to make sure DS
 492                         * is not in use when we change it:
 493                         */
 494                        debugctl = 0;
 495                        update_debugctlmsr(0);
 496                        wrmsrl(MSR_IA32_DS_AREA, ds_next);
 497                }
 498        }
 499#endif /* CONFIG_X86_DS */
 500
 501        if (next->debugctlmsr != debugctl)
 502                update_debugctlmsr(next->debugctlmsr);
 503
 504        if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 505                loaddebug(next, 0);
 506                loaddebug(next, 1);
 507                loaddebug(next, 2);
 508                loaddebug(next, 3);
 509                /* no 4 and 5 */
 510                loaddebug(next, 6);
 511                loaddebug(next, 7);
 512        }
 513
 514        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 515            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 516                /* prev and next are different */
 517                if (test_tsk_thread_flag(next_p, TIF_NOTSC))
 518                        hard_disable_TSC();
 519                else
 520                        hard_enable_TSC();
 521        }
 522
 523        if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 524                /*
 525                 * Copy the relevant range of the IO bitmap.
 526                 * Normally this is 128 bytes or less:
 527                 */
 528                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 529                       max(prev->io_bitmap_max, next->io_bitmap_max));
 530        } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 531                /*
 532                 * Clear any possible leftover bits:
 533                 */
 534                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 535        }
 536
 537#ifdef CONFIG_X86_PTRACE_BTS
 538        if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 539                ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 540
 541        if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 542                ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
 543#endif /* CONFIG_X86_PTRACE_BTS */
 544}
 545
 546/*
 547 *      switch_to(x,y) should switch tasks from x to y.
 548 *
 549 * This could still be optimized:
 550 * - fold all the options into a flag word and test it with a single test.
 551 * - could test fs/gs bitsliced
 552 *
 553 * Kprobes not supported here. Set the probe on schedule instead.
 554 */
 555struct task_struct *
 556__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 557{
 558        struct thread_struct *prev = &prev_p->thread;
 559        struct thread_struct *next = &next_p->thread;
 560        int cpu = smp_processor_id();
 561        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 562        unsigned fsindex, gsindex;
 563
 564        /* we're going to use this soon, after a few expensive things */
 565        if (next_p->fpu_counter > 5)
 566                prefetch(next->xstate);
 567
 568        /*
 569         * Reload esp0, LDT and the page table pointer:
 570         */
 571        load_sp0(tss, next);
 572
 573        /*
 574         * Switch DS and ES.
 575         * This won't pick up thread selector changes, but I guess that is ok.
 576         */
 577        savesegment(es, prev->es);
 578        if (unlikely(next->es | prev->es))
 579                loadsegment(es, next->es);
 580
 581        savesegment(ds, prev->ds);
 582        if (unlikely(next->ds | prev->ds))
 583                loadsegment(ds, next->ds);
 584
 585
 586        /* We must save %fs and %gs before load_TLS() because
 587         * %fs and %gs may be cleared by load_TLS().
 588         *
 589         * (e.g. xen_load_tls())
 590         */
 591        savesegment(fs, fsindex);
 592        savesegment(gs, gsindex);
 593
 594        load_TLS(next, cpu);
 595
 596        /*
 597         * Leave lazy mode, flushing any hypercalls made here.
 598         * This must be done before restoring TLS segments so
 599         * the GDT and LDT are properly updated, and must be
 600         * done before math_state_restore, so the TS bit is up
 601         * to date.
 602         */
 603        arch_leave_lazy_cpu_mode();
 604
 605        /*
 606         * Switch FS and GS.
 607         *
 608         * Segment register != 0 always requires a reload.  Also
 609         * reload when it has changed.  When prev process used 64bit
 610         * base always reload to avoid an information leak.
 611         */
 612        if (unlikely(fsindex | next->fsindex | prev->fs)) {
 613                loadsegment(fs, next->fsindex);
 614                /*
 615                 * Check if the user used a selector != 0; if yes
 616                 *  clear 64bit base, since overloaded base is always
 617                 *  mapped to the Null selector
 618                 */
 619                if (fsindex)
 620                        prev->fs = 0;
 621        }
 622        /* when next process has a 64bit base use it */
 623        if (next->fs)
 624                wrmsrl(MSR_FS_BASE, next->fs);
 625        prev->fsindex = fsindex;
 626
 627        if (unlikely(gsindex | next->gsindex | prev->gs)) {
 628                load_gs_index(next->gsindex);
 629                if (gsindex)
 630                        prev->gs = 0;
 631        }
 632        if (next->gs)
 633                wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 634        prev->gsindex = gsindex;
 635
 636        /* Must be after DS reload */
 637        unlazy_fpu(prev_p);
 638
 639        /*
 640         * Switch the PDA and FPU contexts.
 641         */
 642        prev->usersp = read_pda(oldrsp);
 643        write_pda(oldrsp, next->usersp);
 644        write_pda(pcurrent, next_p);
 645
 646        write_pda(kernelstack,
 647                  (unsigned long)task_stack_page(next_p) +
 648                  THREAD_SIZE - PDA_STACKOFFSET);
 649#ifdef CONFIG_CC_STACKPROTECTOR
 650        write_pda(stack_canary, next_p->stack_canary);
 651        /*
 652         * Build time only check to make sure the stack_canary is at
 653         * offset 40 in the pda; this is a gcc ABI requirement
 654         */
 655        BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
 656#endif
 657
 658        /*
 659         * Now maybe reload the debug registers and handle I/O bitmaps
 660         */
 661        if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
 662                     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 663                __switch_to_xtra(prev_p, next_p, tss);
 664
 665        /* If the task has used fpu the last 5 timeslices, just do a full
 666         * restore of the math state immediately to avoid the trap; the
 667         * chances of needing FPU soon are obviously high now
 668         *
 669         * tsk_used_math() checks prevent calling math_state_restore(),
 670         * which can sleep in the case of !tsk_used_math()
 671         */
 672        if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
 673                math_state_restore();
 674        return prev_p;
 675}
 676
 677/*
 678 * sys_execve() executes a new program.
 679 */
 680asmlinkage
 681long sys_execve(char __user *name, char __user * __user *argv,
 682                char __user * __user *envp, struct pt_regs *regs)
 683{
 684        long error;
 685        char *filename;
 686
 687        filename = getname(name);
 688        error = PTR_ERR(filename);
 689        if (IS_ERR(filename))
 690                return error;
 691        error = do_execve(filename, argv, envp, regs);
 692        putname(filename);
 693        return error;
 694}
 695
 696void set_personality_64bit(void)
 697{
 698        /* inherit personality from parent */
 699
 700        /* Make sure to be in 64bit mode */
 701        clear_thread_flag(TIF_IA32);
 702
 703        /* TBD: overwrites user setup. Should have two bits.
 704           But 64bit processes have always behaved this way,
 705           so it's not too bad. The main problem is just that
 706           32bit childs are affected again. */
 707        current->personality &= ~READ_IMPLIES_EXEC;
 708}
 709
 710asmlinkage long sys_fork(struct pt_regs *regs)
 711{
 712        return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 713}
 714
 715asmlinkage long
 716sys_clone(unsigned long clone_flags, unsigned long newsp,
 717          void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 718{
 719        if (!newsp)
 720                newsp = regs->sp;
 721        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 722}
 723
 724/*
 725 * This is trivial, and on the face of it looks like it
 726 * could equally well be done in user mode.
 727 *
 728 * Not so, for quite unobvious reasons - register pressure.
 729 * In user mode vfork() cannot have a stack frame, and if
 730 * done by calling the "clone()" system call directly, you
 731 * do not have enough call-clobbered registers to hold all
 732 * the information you need.
 733 */
 734asmlinkage long sys_vfork(struct pt_regs *regs)
 735{
 736        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 737                    NULL, NULL);
 738}
 739
 740unsigned long get_wchan(struct task_struct *p)
 741{
 742        unsigned long stack;
 743        u64 fp, ip;
 744        int count = 0;
 745
 746        if (!p || p == current || p->state == TASK_RUNNING)
 747                return 0;
 748        stack = (unsigned long)task_stack_page(p);
 749        if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
 750                return 0;
 751        fp = *(u64 *)(p->thread.sp);
 752        do {
 753                if (fp < (unsigned long)stack ||
 754                    fp >= (unsigned long)stack+THREAD_SIZE)
 755                        return 0;
 756                ip = *(u64 *)(fp+8);
 757                if (!in_sched_functions(ip))
 758                        return ip;
 759                fp = *(u64 *)fp;
 760        } while (count++ < 16);
 761        return 0;
 762}
 763
 764long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 765{
 766        int ret = 0;
 767        int doit = task == current;
 768        int cpu;
 769
 770        switch (code) {
 771        case ARCH_SET_GS:
 772                if (addr >= TASK_SIZE_OF(task))
 773                        return -EPERM;
 774                cpu = get_cpu();
 775                /* handle small bases via the GDT because that's faster to
 776                   switch. */
 777                if (addr <= 0xffffffff) {
 778                        set_32bit_tls(task, GS_TLS, addr);
 779                        if (doit) {
 780                                load_TLS(&task->thread, cpu);
 781                                load_gs_index(GS_TLS_SEL);
 782                        }
 783                        task->thread.gsindex = GS_TLS_SEL;
 784                        task->thread.gs = 0;
 785                } else {
 786                        task->thread.gsindex = 0;
 787                        task->thread.gs = addr;
 788                        if (doit) {
 789                                load_gs_index(0);
 790                                ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 791                        }
 792                }
 793                put_cpu();
 794                break;
 795        case ARCH_SET_FS:
 796                /* Not strictly needed for fs, but do it for symmetry
 797                   with gs */
 798                if (addr >= TASK_SIZE_OF(task))
 799                        return -EPERM;
 800                cpu = get_cpu();
 801                /* handle small bases via the GDT because that's faster to
 802                   switch. */
 803                if (addr <= 0xffffffff) {
 804                        set_32bit_tls(task, FS_TLS, addr);
 805                        if (doit) {
 806                                load_TLS(&task->thread, cpu);
 807                                loadsegment(fs, FS_TLS_SEL);
 808                        }
 809                        task->thread.fsindex = FS_TLS_SEL;
 810                        task->thread.fs = 0;
 811                } else {
 812                        task->thread.fsindex = 0;
 813                        task->thread.fs = addr;
 814                        if (doit) {
 815                                /* set the selector to 0 to not confuse
 816                                   __switch_to */
 817                                loadsegment(fs, 0);
 818                                ret = checking_wrmsrl(MSR_FS_BASE, addr);
 819                        }
 820                }
 821                put_cpu();
 822                break;
 823        case ARCH_GET_FS: {
 824                unsigned long base;
 825                if (task->thread.fsindex == FS_TLS_SEL)
 826                        base = read_32bit_tls(task, FS_TLS);
 827                else if (doit)
 828                        rdmsrl(MSR_FS_BASE, base);
 829                else
 830                        base = task->thread.fs;
 831                ret = put_user(base, (unsigned long __user *)addr);
 832                break;
 833        }
 834        case ARCH_GET_GS: {
 835                unsigned long base;
 836                unsigned gsindex;
 837                if (task->thread.gsindex == GS_TLS_SEL)
 838                        base = read_32bit_tls(task, GS_TLS);
 839                else if (doit) {
 840                        savesegment(gs, gsindex);
 841                        if (gsindex)
 842                                rdmsrl(MSR_KERNEL_GS_BASE, base);
 843                        else
 844                                base = task->thread.gs;
 845                } else
 846                        base = task->thread.gs;
 847                ret = put_user(base, (unsigned long __user *)addr);
 848                break;
 849        }
 850
 851        default:
 852                ret = -EINVAL;
 853                break;
 854        }
 855
 856        return ret;
 857}
 858
 859long sys_arch_prctl(int code, unsigned long addr)
 860{
 861        return do_arch_prctl(current, code, addr);
 862}
 863
 864unsigned long arch_align_stack(unsigned long sp)
 865{
 866        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 867                sp -= get_random_int() % 8192;
 868        return sp & ~0xf;
 869}
 870
 871unsigned long arch_randomize_brk(struct mm_struct *mm)
 872{
 873        unsigned long range_end = mm->brk + 0x02000000;
 874        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 875}
 876
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.