linux/arch/x86/kernel/process_64.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1995  Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *      Gareth Hughes <gareth@valinux.com>, May 2000
   6 *
   7 *  X86-64 port
   8 *      Andi Kleen.
   9 *
  10 *      CPU hotplug support - ashok.raj@intel.com
  11 */
  12
  13/*
  14 * This file handles the architecture-dependent parts of process handling..
  15 */
  16
  17#include <stdarg.h>
  18
  19#include <linux/cpu.h>
  20#include <linux/errno.h>
  21#include <linux/sched.h>
  22#include <linux/fs.h>
  23#include <linux/kernel.h>
  24#include <linux/mm.h>
  25#include <linux/elfcore.h>
  26#include <linux/smp.h>
  27#include <linux/slab.h>
  28#include <linux/user.h>
  29#include <linux/interrupt.h>
  30#include <linux/utsname.h>
  31#include <linux/delay.h>
  32#include <linux/module.h>
  33#include <linux/ptrace.h>
  34#include <linux/random.h>
  35#include <linux/notifier.h>
  36#include <linux/kprobes.h>
  37#include <linux/kdebug.h>
  38#include <linux/tick.h>
  39#include <linux/prctl.h>
  40#include <linux/uaccess.h>
  41#include <linux/io.h>
  42#include <linux/ftrace.h>
  43#include <linux/dmi.h>
  44
  45#include <asm/pgtable.h>
  46#include <asm/system.h>
  47#include <asm/processor.h>
  48#include <asm/i387.h>
  49#include <asm/mmu_context.h>
  50#include <asm/pda.h>
  51#include <asm/prctl.h>
  52#include <asm/desc.h>
  53#include <asm/proto.h>
  54#include <asm/ia32.h>
  55#include <asm/idle.h>
  56#include <asm/syscalls.h>
  57#include <asm/ds.h>
  58
  59asmlinkage extern void ret_from_fork(void);
  60
  61unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
  62
  63static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  64
  65void idle_notifier_register(struct notifier_block *n)
  66{
  67        atomic_notifier_chain_register(&idle_notifier, n);
  68}
  69EXPORT_SYMBOL_GPL(idle_notifier_register);
  70
  71void idle_notifier_unregister(struct notifier_block *n)
  72{
  73        atomic_notifier_chain_unregister(&idle_notifier, n);
  74}
  75EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  76
  77void enter_idle(void)
  78{
  79        write_pda(isidle, 1);
  80        atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
  81}
  82
  83static void __exit_idle(void)
  84{
  85        if (test_and_clear_bit_pda(0, isidle) == 0)
  86                return;
  87        atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
  88}
  89
  90/* Called from interrupts to signify idle end */
  91void exit_idle(void)
  92{
  93        /* idle loop has pid 0 */
  94        if (current->pid)
  95                return;
  96        __exit_idle();
  97}
  98
  99#ifndef CONFIG_SMP
 100static inline void play_dead(void)
 101{
 102        BUG();
 103}
 104#endif
 105
 106/*
 107 * The idle thread. There's no useful work to be
 108 * done, so just try to conserve power and have a
 109 * low exit latency (ie sit in a loop waiting for
 110 * somebody to say that they'd like to reschedule)
 111 */
 112void cpu_idle(void)
 113{
 114        current_thread_info()->status |= TS_POLLING;
 115        /* endless idle loop with no priority at all */
 116        while (1) {
 117                tick_nohz_stop_sched_tick(1);
 118                while (!need_resched()) {
 119
 120                        rmb();
 121
 122                        if (cpu_is_offline(smp_processor_id()))
 123                                play_dead();
 124                        /*
 125                         * Idle routines should keep interrupts disabled
 126                         * from here on, until they go to idle.
 127                         * Otherwise, idle callbacks can misfire.
 128                         */
 129                        local_irq_disable();
 130                        enter_idle();
 131                        /* Don't trace irqs off for idle */
 132                        stop_critical_timings();
 133                        pm_idle();
 134                        start_critical_timings();
 135                        /* In many cases the interrupt that ended idle
 136                           has already called exit_idle. But some idle
 137                           loops can be woken up without interrupt. */
 138                        __exit_idle();
 139                }
 140
 141                tick_nohz_restart_sched_tick();
 142                preempt_enable_no_resched();
 143                schedule();
 144                preempt_disable();
 145        }
 146}
 147
 148/* Prints also some state that isn't saved in the pt_regs */
 149void __show_regs(struct pt_regs *regs, int all)
 150{
 151        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 152        unsigned long d0, d1, d2, d3, d6, d7;
 153        unsigned int fsindex, gsindex;
 154        unsigned int ds, cs, es;
 155        const char *board;
 156
 157        printk("\n");
 158        print_modules();
 159        board = dmi_get_system_info(DMI_PRODUCT_NAME);
 160        if (!board)
 161                board = "";
 162        printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
 163                current->pid, current->comm, print_tainted(),
 164                init_utsname()->release,
 165                (int)strcspn(init_utsname()->version, " "),
 166                init_utsname()->version, board);
 167        printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 168        printk_address(regs->ip, 1);
 169        printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
 170                        regs->sp, regs->flags);
 171        printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 172               regs->ax, regs->bx, regs->cx);
 173        printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 174               regs->dx, regs->si, regs->di);
 175        printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
 176               regs->bp, regs->r8, regs->r9);
 177        printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
 178               regs->r10, regs->r11, regs->r12);
 179        printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
 180               regs->r13, regs->r14, regs->r15);
 181
 182        asm("movl %%ds,%0" : "=r" (ds));
 183        asm("movl %%cs,%0" : "=r" (cs));
 184        asm("movl %%es,%0" : "=r" (es));
 185        asm("movl %%fs,%0" : "=r" (fsindex));
 186        asm("movl %%gs,%0" : "=r" (gsindex));
 187
 188        rdmsrl(MSR_FS_BASE, fs);
 189        rdmsrl(MSR_GS_BASE, gs);
 190        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 191
 192        if (!all)
 193                return;
 194
 195        cr0 = read_cr0();
 196        cr2 = read_cr2();
 197        cr3 = read_cr3();
 198        cr4 = read_cr4();
 199
 200        printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
 201               fs, fsindex, gs, gsindex, shadowgs);
 202        printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
 203                        es, cr0);
 204        printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
 205                        cr4);
 206
 207        get_debugreg(d0, 0);
 208        get_debugreg(d1, 1);
 209        get_debugreg(d2, 2);
 210        printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 211        get_debugreg(d3, 3);
 212        get_debugreg(d6, 6);
 213        get_debugreg(d7, 7);
 214        printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 215}
 216
 217void show_regs(struct pt_regs *regs)
 218{
 219        printk(KERN_INFO "CPU %d:", smp_processor_id());
 220        __show_regs(regs, 1);
 221        show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 222}
 223
 224/*
 225 * Free current thread data structures etc..
 226 */
 227void exit_thread(void)
 228{
 229        struct task_struct *me = current;
 230        struct thread_struct *t = &me->thread;
 231
 232        if (me->thread.io_bitmap_ptr) {
 233                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 234
 235                kfree(t->io_bitmap_ptr);
 236                t->io_bitmap_ptr = NULL;
 237                clear_thread_flag(TIF_IO_BITMAP);
 238                /*
 239                 * Careful, clear this in the TSS too:
 240                 */
 241                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
 242                t->io_bitmap_max = 0;
 243                put_cpu();
 244        }
 245
 246        ds_exit_thread(current);
 247}
 248
 249void flush_thread(void)
 250{
 251        struct task_struct *tsk = current;
 252
 253        if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
 254                clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
 255                if (test_tsk_thread_flag(tsk, TIF_IA32)) {
 256                        clear_tsk_thread_flag(tsk, TIF_IA32);
 257                } else {
 258                        set_tsk_thread_flag(tsk, TIF_IA32);
 259                        current_thread_info()->status |= TS_COMPAT;
 260                }
 261        }
 262        clear_tsk_thread_flag(tsk, TIF_DEBUG);
 263
 264        tsk->thread.debugreg0 = 0;
 265        tsk->thread.debugreg1 = 0;
 266        tsk->thread.debugreg2 = 0;
 267        tsk->thread.debugreg3 = 0;
 268        tsk->thread.debugreg6 = 0;
 269        tsk->thread.debugreg7 = 0;
 270        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 271        /*
 272         * Forget coprocessor state..
 273         */
 274        tsk->fpu_counter = 0;
 275        clear_fpu(tsk);
 276        clear_used_math();
 277}
 278
 279void release_thread(struct task_struct *dead_task)
 280{
 281        if (dead_task->mm) {
 282                if (dead_task->mm->context.size) {
 283                        printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
 284                                        dead_task->comm,
 285                                        dead_task->mm->context.ldt,
 286                                        dead_task->mm->context.size);
 287                        BUG();
 288                }
 289        }
 290}
 291
 292static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 293{
 294        struct user_desc ud = {
 295                .base_addr = addr,
 296                .limit = 0xfffff,
 297                .seg_32bit = 1,
 298                .limit_in_pages = 1,
 299                .useable = 1,
 300        };
 301        struct desc_struct *desc = t->thread.tls_array;
 302        desc += tls;
 303        fill_ldt(desc, &ud);
 304}
 305
 306static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 307{
 308        return get_desc_base(&t->thread.tls_array[tls]);
 309}
 310
 311/*
 312 * This gets called before we allocate a new thread and copy
 313 * the current task into it.
 314 */
 315void prepare_to_copy(struct task_struct *tsk)
 316{
 317        unlazy_fpu(tsk);
 318}
 319
 320int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 321                unsigned long unused,
 322        struct task_struct *p, struct pt_regs *regs)
 323{
 324        int err;
 325        struct pt_regs *childregs;
 326        struct task_struct *me = current;
 327
 328        childregs = ((struct pt_regs *)
 329                        (THREAD_SIZE + task_stack_page(p))) - 1;
 330        *childregs = *regs;
 331
 332        childregs->ax = 0;
 333        childregs->sp = sp;
 334        if (sp == ~0UL)
 335                childregs->sp = (unsigned long)childregs;
 336
 337        p->thread.sp = (unsigned long) childregs;
 338        p->thread.sp0 = (unsigned long) (childregs+1);
 339        p->thread.usersp = me->thread.usersp;
 340
 341        set_tsk_thread_flag(p, TIF_FORK);
 342
 343        p->thread.fs = me->thread.fs;
 344        p->thread.gs = me->thread.gs;
 345
 346        savesegment(gs, p->thread.gsindex);
 347        savesegment(fs, p->thread.fsindex);
 348        savesegment(es, p->thread.es);
 349        savesegment(ds, p->thread.ds);
 350
 351        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 352                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 353                if (!p->thread.io_bitmap_ptr) {
 354                        p->thread.io_bitmap_max = 0;
 355                        return -ENOMEM;
 356                }
 357                memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 358                                IO_BITMAP_BYTES);
 359                set_tsk_thread_flag(p, TIF_IO_BITMAP);
 360        }
 361
 362        /*
 363         * Set a new TLS for the child thread?
 364         */
 365        if (clone_flags & CLONE_SETTLS) {
 366#ifdef CONFIG_IA32_EMULATION
 367                if (test_thread_flag(TIF_IA32))
 368                        err = do_set_thread_area(p, -1,
 369                                (struct user_desc __user *)childregs->si, 0);
 370                else
 371#endif
 372                        err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
 373                if (err)
 374                        goto out;
 375        }
 376
 377        ds_copy_thread(p, me);
 378
 379        clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 380        p->thread.debugctlmsr = 0;
 381
 382        err = 0;
 383out:
 384        if (err && p->thread.io_bitmap_ptr) {
 385                kfree(p->thread.io_bitmap_ptr);
 386                p->thread.io_bitmap_max = 0;
 387        }
 388        return err;
 389}
 390
 391void
 392start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 393{
 394        loadsegment(fs, 0);
 395        loadsegment(es, 0);
 396        loadsegment(ds, 0);
 397        load_gs_index(0);
 398        regs->ip                = new_ip;
 399        regs->sp                = new_sp;
 400        write_pda(oldrsp, new_sp);
 401        regs->cs                = __USER_CS;
 402        regs->ss                = __USER_DS;
 403        regs->flags             = 0x200;
 404        set_fs(USER_DS);
 405        /*
 406         * Free the old FP and other extended state
 407         */
 408        free_thread_xstate(current);
 409}
 410EXPORT_SYMBOL_GPL(start_thread);
 411
 412static void hard_disable_TSC(void)
 413{
 414        write_cr4(read_cr4() | X86_CR4_TSD);
 415}
 416
 417void disable_TSC(void)
 418{
 419        preempt_disable();
 420        if (!test_and_set_thread_flag(TIF_NOTSC))
 421                /*
 422                 * Must flip the CPU state synchronously with
 423                 * TIF_NOTSC in the current running context.
 424                 */
 425                hard_disable_TSC();
 426        preempt_enable();
 427}
 428
 429static void hard_enable_TSC(void)
 430{
 431        write_cr4(read_cr4() & ~X86_CR4_TSD);
 432}
 433
 434static void enable_TSC(void)
 435{
 436        preempt_disable();
 437        if (test_and_clear_thread_flag(TIF_NOTSC))
 438                /*
 439                 * Must flip the CPU state synchronously with
 440                 * TIF_NOTSC in the current running context.
 441                 */
 442                hard_enable_TSC();
 443        preempt_enable();
 444}
 445
 446int get_tsc_mode(unsigned long adr)
 447{
 448        unsigned int val;
 449
 450        if (test_thread_flag(TIF_NOTSC))
 451                val = PR_TSC_SIGSEGV;
 452        else
 453                val = PR_TSC_ENABLE;
 454
 455        return put_user(val, (unsigned int __user *)adr);
 456}
 457
 458int set_tsc_mode(unsigned int val)
 459{
 460        if (val == PR_TSC_SIGSEGV)
 461                disable_TSC();
 462        else if (val == PR_TSC_ENABLE)
 463                enable_TSC();
 464        else
 465                return -EINVAL;
 466
 467        return 0;
 468}
 469
 470/*
 471 * This special macro can be used to load a debugging register
 472 */
 473#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
 474
 475static inline void __switch_to_xtra(struct task_struct *prev_p,
 476                                    struct task_struct *next_p,
 477                                    struct tss_struct *tss)
 478{
 479        struct thread_struct *prev, *next;
 480
 481        prev = &prev_p->thread,
 482        next = &next_p->thread;
 483
 484        if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
 485            test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
 486                ds_switch_to(prev_p, next_p);
 487        else if (next->debugctlmsr != prev->debugctlmsr)
 488                update_debugctlmsr(next->debugctlmsr);
 489
 490        if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 491                loaddebug(next, 0);
 492                loaddebug(next, 1);
 493                loaddebug(next, 2);
 494                loaddebug(next, 3);
 495                /* no 4 and 5 */
 496                loaddebug(next, 6);
 497                loaddebug(next, 7);
 498        }
 499
 500        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 501            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 502                /* prev and next are different */
 503                if (test_tsk_thread_flag(next_p, TIF_NOTSC))
 504                        hard_disable_TSC();
 505                else
 506                        hard_enable_TSC();
 507        }
 508
 509        if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 510                /*
 511                 * Copy the relevant range of the IO bitmap.
 512                 * Normally this is 128 bytes or less:
 513                 */
 514                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 515                       max(prev->io_bitmap_max, next->io_bitmap_max));
 516        } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 517                /*
 518                 * Clear any possible leftover bits:
 519                 */
 520                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 521        }
 522}
 523
 524/*
 525 *      switch_to(x,y) should switch tasks from x to y.
 526 *
 527 * This could still be optimized:
 528 * - fold all the options into a flag word and test it with a single test.
 529 * - could test fs/gs bitsliced
 530 *
 531 * Kprobes not supported here. Set the probe on schedule instead.
 532 * Function graph tracer not supported too.
 533 */
 534__notrace_funcgraph struct task_struct *
 535__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 536{
 537        struct thread_struct *prev = &prev_p->thread;
 538        struct thread_struct *next = &next_p->thread;
 539        int cpu = smp_processor_id();
 540        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 541        unsigned fsindex, gsindex;
 542
 543        /* we're going to use this soon, after a few expensive things */
 544        if (next_p->fpu_counter > 5)
 545                prefetch(next->xstate);
 546
 547        /*
 548         * Reload esp0, LDT and the page table pointer:
 549         */
 550        load_sp0(tss, next);
 551
 552        /*
 553         * Switch DS and ES.
 554         * This won't pick up thread selector changes, but I guess that is ok.
 555         */
 556        savesegment(es, prev->es);
 557        if (unlikely(next->es | prev->es))
 558                loadsegment(es, next->es);
 559
 560        savesegment(ds, prev->ds);
 561        if (unlikely(next->ds | prev->ds))
 562                loadsegment(ds, next->ds);
 563
 564
 565        /* We must save %fs and %gs before load_TLS() because
 566         * %fs and %gs may be cleared by load_TLS().
 567         *
 568         * (e.g. xen_load_tls())
 569         */
 570        savesegment(fs, fsindex);
 571        savesegment(gs, gsindex);
 572
 573        load_TLS(next, cpu);
 574
 575        /*
 576         * Leave lazy mode, flushing any hypercalls made here.
 577         * This must be done before restoring TLS segments so
 578         * the GDT and LDT are properly updated, and must be
 579         * done before math_state_restore, so the TS bit is up
 580         * to date.
 581         */
 582        arch_leave_lazy_cpu_mode();
 583
 584        /*
 585         * Switch FS and GS.
 586         *
 587         * Segment register != 0 always requires a reload.  Also
 588         * reload when it has changed.  When prev process used 64bit
 589         * base always reload to avoid an information leak.
 590         */
 591        if (unlikely(fsindex | next->fsindex | prev->fs)) {
 592                loadsegment(fs, next->fsindex);
 593                /*
 594                 * Check if the user used a selector != 0; if yes
 595                 *  clear 64bit base, since overloaded base is always
 596                 *  mapped to the Null selector
 597                 */
 598                if (fsindex)
 599                        prev->fs = 0;
 600        }
 601        /* when next process has a 64bit base use it */
 602        if (next->fs)
 603                wrmsrl(MSR_FS_BASE, next->fs);
 604        prev->fsindex = fsindex;
 605
 606        if (unlikely(gsindex | next->gsindex | prev->gs)) {
 607                load_gs_index(next->gsindex);
 608                if (gsindex)
 609                        prev->gs = 0;
 610        }
 611        if (next->gs)
 612                wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 613        prev->gsindex = gsindex;
 614
 615        /* Must be after DS reload */
 616        unlazy_fpu(prev_p);
 617
 618        /*
 619         * Switch the PDA and FPU contexts.
 620         */
 621        prev->usersp = read_pda(oldrsp);
 622        write_pda(oldrsp, next->usersp);
 623        write_pda(pcurrent, next_p);
 624
 625        write_pda(kernelstack,
 626                  (unsigned long)task_stack_page(next_p) +
 627                  THREAD_SIZE - PDA_STACKOFFSET);
 628#ifdef CONFIG_CC_STACKPROTECTOR
 629        write_pda(stack_canary, next_p->stack_canary);
 630        /*
 631         * Build time only check to make sure the stack_canary is at
 632         * offset 40 in the pda; this is a gcc ABI requirement
 633         */
 634        BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
 635#endif
 636
 637        /*
 638         * Now maybe reload the debug registers and handle I/O bitmaps
 639         */
 640        if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
 641                     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 642                __switch_to_xtra(prev_p, next_p, tss);
 643
 644        /* If the task has used fpu the last 5 timeslices, just do a full
 645         * restore of the math state immediately to avoid the trap; the
 646         * chances of needing FPU soon are obviously high now
 647         *
 648         * tsk_used_math() checks prevent calling math_state_restore(),
 649         * which can sleep in the case of !tsk_used_math()
 650         */
 651        if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
 652                math_state_restore();
 653        return prev_p;
 654}
 655
 656/*
 657 * sys_execve() executes a new program.
 658 */
 659asmlinkage
 660long sys_execve(char __user *name, char __user * __user *argv,
 661                char __user * __user *envp, struct pt_regs *regs)
 662{
 663        long error;
 664        char *filename;
 665
 666        filename = getname(name);
 667        error = PTR_ERR(filename);
 668        if (IS_ERR(filename))
 669                return error;
 670        error = do_execve(filename, argv, envp, regs);
 671        putname(filename);
 672        return error;
 673}
 674
 675void set_personality_64bit(void)
 676{
 677        /* inherit personality from parent */
 678
 679        /* Make sure to be in 64bit mode */
 680        clear_thread_flag(TIF_IA32);
 681
 682        /* TBD: overwrites user setup. Should have two bits.
 683           But 64bit processes have always behaved this way,
 684           so it's not too bad. The main problem is just that
 685           32bit childs are affected again. */
 686        current->personality &= ~READ_IMPLIES_EXEC;
 687}
 688
 689asmlinkage long sys_fork(struct pt_regs *regs)
 690{
 691        return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 692}
 693
 694asmlinkage long
 695sys_clone(unsigned long clone_flags, unsigned long newsp,
 696          void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 697{
 698        if (!newsp)
 699                newsp = regs->sp;
 700        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 701}
 702
 703/*
 704 * This is trivial, and on the face of it looks like it
 705 * could equally well be done in user mode.
 706 *
 707 * Not so, for quite unobvious reasons - register pressure.
 708 * In user mode vfork() cannot have a stack frame, and if
 709 * done by calling the "clone()" system call directly, you
 710 * do not have enough call-clobbered registers to hold all
 711 * the information you need.
 712 */
 713asmlinkage long sys_vfork(struct pt_regs *regs)
 714{
 715        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 716                    NULL, NULL);
 717}
 718
 719unsigned long get_wchan(struct task_struct *p)
 720{
 721        unsigned long stack;
 722        u64 fp, ip;
 723        int count = 0;
 724
 725        if (!p || p == current || p->state == TASK_RUNNING)
 726                return 0;
 727        stack = (unsigned long)task_stack_page(p);
 728        if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
 729                return 0;
 730        fp = *(u64 *)(p->thread.sp);
 731        do {
 732                if (fp < (unsigned long)stack ||
 733                    fp >= (unsigned long)stack+THREAD_SIZE)
 734                        return 0;
 735                ip = *(u64 *)(fp+8);
 736                if (!in_sched_functions(ip))
 737                        return ip;
 738                fp = *(u64 *)fp;
 739        } while (count++ < 16);
 740        return 0;
 741}
 742
 743long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 744{
 745        int ret = 0;
 746        int doit = task == current;
 747        int cpu;
 748
 749        switch (code) {
 750        case ARCH_SET_GS:
 751                if (addr >= TASK_SIZE_OF(task))
 752                        return -EPERM;
 753                cpu = get_cpu();
 754                /* handle small bases via the GDT because that's faster to
 755                   switch. */
 756                if (addr <= 0xffffffff) {
 757                        set_32bit_tls(task, GS_TLS, addr);
 758                        if (doit) {
 759                                load_TLS(&task->thread, cpu);
 760                                load_gs_index(GS_TLS_SEL);
 761                        }
 762                        task->thread.gsindex = GS_TLS_SEL;
 763                        task->thread.gs = 0;
 764                } else {
 765                        task->thread.gsindex = 0;
 766                        task->thread.gs = addr;
 767                        if (doit) {
 768                                load_gs_index(0);
 769                                ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 770                        }
 771                }
 772                put_cpu();
 773                break;
 774        case ARCH_SET_FS:
 775                /* Not strictly needed for fs, but do it for symmetry
 776                   with gs */
 777                if (addr >= TASK_SIZE_OF(task))
 778                        return -EPERM;
 779                cpu = get_cpu();
 780                /* handle small bases via the GDT because that's faster to
 781                   switch. */
 782                if (addr <= 0xffffffff) {
 783                        set_32bit_tls(task, FS_TLS, addr);
 784                        if (doit) {
 785                                load_TLS(&task->thread, cpu);
 786                                loadsegment(fs, FS_TLS_SEL);
 787                        }
 788                        task->thread.fsindex = FS_TLS_SEL;
 789                        task->thread.fs = 0;
 790                } else {
 791                        task->thread.fsindex = 0;
 792                        task->thread.fs = addr;
 793                        if (doit) {
 794                                /* set the selector to 0 to not confuse
 795                                   __switch_to */
 796                                loadsegment(fs, 0);
 797                                ret = checking_wrmsrl(MSR_FS_BASE, addr);
 798                        }
 799                }
 800                put_cpu();
 801                break;
 802        case ARCH_GET_FS: {
 803                unsigned long base;
 804                if (task->thread.fsindex == FS_TLS_SEL)
 805                        base = read_32bit_tls(task, FS_TLS);
 806                else if (doit)
 807                        rdmsrl(MSR_FS_BASE, base);
 808                else
 809                        base = task->thread.fs;
 810                ret = put_user(base, (unsigned long __user *)addr);
 811                break;
 812        }
 813        case ARCH_GET_GS: {
 814                unsigned long base;
 815                unsigned gsindex;
 816                if (task->thread.gsindex == GS_TLS_SEL)
 817                        base = read_32bit_tls(task, GS_TLS);
 818                else if (doit) {
 819                        savesegment(gs, gsindex);
 820                        if (gsindex)
 821                                rdmsrl(MSR_KERNEL_GS_BASE, base);
 822                        else
 823                                base = task->thread.gs;
 824                } else
 825                        base = task->thread.gs;
 826                ret = put_user(base, (unsigned long __user *)addr);
 827                break;
 828        }
 829
 830        default:
 831                ret = -EINVAL;
 832                break;
 833        }
 834
 835        return ret;
 836}
 837
 838long sys_arch_prctl(int code, unsigned long addr)
 839{
 840        return do_arch_prctl(current, code, addr);
 841}
 842
 843unsigned long arch_align_stack(unsigned long sp)
 844{
 845        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 846                sp -= get_random_int() % 8192;
 847        return sp & ~0xf;
 848}
 849
 850unsigned long arch_randomize_brk(struct mm_struct *mm)
 851{
 852        unsigned long range_end = mm->brk + 0x02000000;
 853        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 854}
 855
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.