linux/arch/x86/kernel/process_64.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1995  Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *      Gareth Hughes <gareth@valinux.com>, May 2000
   6 *
   7 *  X86-64 port
   8 *      Andi Kleen.
   9 *
  10 *      CPU hotplug support - ashok.raj@intel.com
  11 */
  12
  13/*
  14 * This file handles the architecture-dependent parts of process handling..
  15 */
  16
  17#include <stdarg.h>
  18
  19#include <linux/cpu.h>
  20#include <linux/errno.h>
  21#include <linux/sched.h>
  22#include <linux/fs.h>
  23#include <linux/kernel.h>
  24#include <linux/mm.h>
  25#include <linux/elfcore.h>
  26#include <linux/smp.h>
  27#include <linux/slab.h>
  28#include <linux/user.h>
  29#include <linux/interrupt.h>
  30#include <linux/utsname.h>
  31#include <linux/delay.h>
  32#include <linux/module.h>
  33#include <linux/ptrace.h>
  34#include <linux/random.h>
  35#include <linux/notifier.h>
  36#include <linux/kprobes.h>
  37#include <linux/kdebug.h>
  38#include <linux/tick.h>
  39#include <linux/prctl.h>
  40
  41#include <asm/uaccess.h>
  42#include <asm/pgtable.h>
  43#include <asm/system.h>
  44#include <asm/io.h>
  45#include <asm/processor.h>
  46#include <asm/i387.h>
  47#include <asm/mmu_context.h>
  48#include <asm/pda.h>
  49#include <asm/prctl.h>
  50#include <asm/desc.h>
  51#include <asm/proto.h>
  52#include <asm/ia32.h>
  53#include <asm/idle.h>
  54
  55asmlinkage extern void ret_from_fork(void);
  56
  57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
  58
  59static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  60
  61void idle_notifier_register(struct notifier_block *n)
  62{
  63        atomic_notifier_chain_register(&idle_notifier, n);
  64}
  65
  66void enter_idle(void)
  67{
  68        write_pda(isidle, 1);
  69        atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
  70}
  71
  72static void __exit_idle(void)
  73{
  74        if (test_and_clear_bit_pda(0, isidle) == 0)
  75                return;
  76        atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
  77}
  78
  79/* Called from interrupts to signify idle end */
  80void exit_idle(void)
  81{
  82        /* idle loop has pid 0 */
  83        if (current->pid)
  84                return;
  85        __exit_idle();
  86}
  87
  88#ifdef CONFIG_HOTPLUG_CPU
  89DECLARE_PER_CPU(int, cpu_state);
  90
  91#include <asm/nmi.h>
  92/* We halt the CPU with physical CPU hotplug */
  93static inline void play_dead(void)
  94{
  95        idle_task_exit();
  96        c1e_remove_cpu(raw_smp_processor_id());
  97
  98        mb();
  99        /* Ack it */
 100        __get_cpu_var(cpu_state) = CPU_DEAD;
 101
 102        local_irq_disable();
 103        /* mask all interrupts, flush any and all caches, and halt */
 104        wbinvd_halt();
 105}
 106#else
 107static inline void play_dead(void)
 108{
 109        BUG();
 110}
 111#endif /* CONFIG_HOTPLUG_CPU */
 112
 113/*
 114 * The idle thread. There's no useful work to be
 115 * done, so just try to conserve power and have a
 116 * low exit latency (ie sit in a loop waiting for
 117 * somebody to say that they'd like to reschedule)
 118 */
 119void cpu_idle(void)
 120{
 121        current_thread_info()->status |= TS_POLLING;
 122        /* endless idle loop with no priority at all */
 123        while (1) {
 124                tick_nohz_stop_sched_tick(1);
 125                while (!need_resched()) {
 126
 127                        rmb();
 128
 129                        if (cpu_is_offline(smp_processor_id()))
 130                                play_dead();
 131                        /*
 132                         * Idle routines should keep interrupts disabled
 133                         * from here on, until they go to idle.
 134                         * Otherwise, idle callbacks can misfire.
 135                         */
 136                        local_irq_disable();
 137                        enter_idle();
 138                        /* Don't trace irqs off for idle */
 139                        stop_critical_timings();
 140                        pm_idle();
 141                        start_critical_timings();
 142                        /* In many cases the interrupt that ended idle
 143                           has already called exit_idle. But some idle
 144                           loops can be woken up without interrupt. */
 145                        __exit_idle();
 146                }
 147
 148                tick_nohz_restart_sched_tick();
 149                preempt_enable_no_resched();
 150                schedule();
 151                preempt_disable();
 152        }
 153}
 154
 155/* Prints also some state that isn't saved in the pt_regs */
 156void __show_regs(struct pt_regs * regs)
 157{
 158        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 159        unsigned long d0, d1, d2, d3, d6, d7;
 160        unsigned int fsindex, gsindex;
 161        unsigned int ds, cs, es;
 162
 163        printk("\n");
 164        print_modules();
 165        printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 166                current->pid, current->comm, print_tainted(),
 167                init_utsname()->release,
 168                (int)strcspn(init_utsname()->version, " "),
 169                init_utsname()->version);
 170        printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 171        printk_address(regs->ip, 1);
 172        printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
 173                regs->flags);
 174        printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 175               regs->ax, regs->bx, regs->cx);
 176        printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
 177               regs->dx, regs->si, regs->di);
 178        printk("RBP: %016lx R08: %016lx R09: %016lx\n",
 179               regs->bp, regs->r8, regs->r9);
 180        printk("R10: %016lx R11: %016lx R12: %016lx\n",
 181               regs->r10, regs->r11, regs->r12); 
 182        printk("R13: %016lx R14: %016lx R15: %016lx\n",
 183               regs->r13, regs->r14, regs->r15); 
 184
 185        asm("movl %%ds,%0" : "=r" (ds)); 
 186        asm("movl %%cs,%0" : "=r" (cs)); 
 187        asm("movl %%es,%0" : "=r" (es)); 
 188        asm("movl %%fs,%0" : "=r" (fsindex));
 189        asm("movl %%gs,%0" : "=r" (gsindex));
 190
 191        rdmsrl(MSR_FS_BASE, fs);
 192        rdmsrl(MSR_GS_BASE, gs); 
 193        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
 194
 195        cr0 = read_cr0();
 196        cr2 = read_cr2();
 197        cr3 = read_cr3();
 198        cr4 = read_cr4();
 199
 200        printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
 201               fs,fsindex,gs,gsindex,shadowgs); 
 202        printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
 203        printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
 204
 205        get_debugreg(d0, 0);
 206        get_debugreg(d1, 1);
 207        get_debugreg(d2, 2);
 208        printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 209        get_debugreg(d3, 3);
 210        get_debugreg(d6, 6);
 211        get_debugreg(d7, 7);
 212        printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 213}
 214
 215void show_regs(struct pt_regs *regs)
 216{
 217        printk("CPU %d:", smp_processor_id());
 218        __show_regs(regs);
 219        show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 220}
 221
 222/*
 223 * Free current thread data structures etc..
 224 */
 225void exit_thread(void)
 226{
 227        struct task_struct *me = current;
 228        struct thread_struct *t = &me->thread;
 229
 230        if (me->thread.io_bitmap_ptr) {
 231                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 232
 233                kfree(t->io_bitmap_ptr);
 234                t->io_bitmap_ptr = NULL;
 235                clear_thread_flag(TIF_IO_BITMAP);
 236                /*
 237                 * Careful, clear this in the TSS too:
 238                 */
 239                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
 240                t->io_bitmap_max = 0;
 241                put_cpu();
 242        }
 243}
 244
 245void flush_thread(void)
 246{
 247        struct task_struct *tsk = current;
 248
 249        if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
 250                clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
 251                if (test_tsk_thread_flag(tsk, TIF_IA32)) {
 252                        clear_tsk_thread_flag(tsk, TIF_IA32);
 253                } else {
 254                        set_tsk_thread_flag(tsk, TIF_IA32);
 255                        current_thread_info()->status |= TS_COMPAT;
 256                }
 257        }
 258        clear_tsk_thread_flag(tsk, TIF_DEBUG);
 259
 260        tsk->thread.debugreg0 = 0;
 261        tsk->thread.debugreg1 = 0;
 262        tsk->thread.debugreg2 = 0;
 263        tsk->thread.debugreg3 = 0;
 264        tsk->thread.debugreg6 = 0;
 265        tsk->thread.debugreg7 = 0;
 266        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 267        /*
 268         * Forget coprocessor state..
 269         */
 270        tsk->fpu_counter = 0;
 271        clear_fpu(tsk);
 272        clear_used_math();
 273}
 274
 275void release_thread(struct task_struct *dead_task)
 276{
 277        if (dead_task->mm) {
 278                if (dead_task->mm->context.size) {
 279                        printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
 280                                        dead_task->comm,
 281                                        dead_task->mm->context.ldt,
 282                                        dead_task->mm->context.size);
 283                        BUG();
 284                }
 285        }
 286}
 287
 288static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 289{
 290        struct user_desc ud = {
 291                .base_addr = addr,
 292                .limit = 0xfffff,
 293                .seg_32bit = 1,
 294                .limit_in_pages = 1,
 295                .useable = 1,
 296        };
 297        struct desc_struct *desc = t->thread.tls_array;
 298        desc += tls;
 299        fill_ldt(desc, &ud);
 300}
 301
 302static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 303{
 304        return get_desc_base(&t->thread.tls_array[tls]);
 305}
 306
 307/*
 308 * This gets called before we allocate a new thread and copy
 309 * the current task into it.
 310 */
 311void prepare_to_copy(struct task_struct *tsk)
 312{
 313        unlazy_fpu(tsk);
 314}
 315
 316int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 317                unsigned long unused,
 318        struct task_struct * p, struct pt_regs * regs)
 319{
 320        int err;
 321        struct pt_regs * childregs;
 322        struct task_struct *me = current;
 323
 324        childregs = ((struct pt_regs *)
 325                        (THREAD_SIZE + task_stack_page(p))) - 1;
 326        *childregs = *regs;
 327
 328        childregs->ax = 0;
 329        childregs->sp = sp;
 330        if (sp == ~0UL)
 331                childregs->sp = (unsigned long)childregs;
 332
 333        p->thread.sp = (unsigned long) childregs;
 334        p->thread.sp0 = (unsigned long) (childregs+1);
 335        p->thread.usersp = me->thread.usersp;
 336
 337        set_tsk_thread_flag(p, TIF_FORK);
 338
 339        p->thread.fs = me->thread.fs;
 340        p->thread.gs = me->thread.gs;
 341
 342        savesegment(gs, p->thread.gsindex);
 343        savesegment(fs, p->thread.fsindex);
 344        savesegment(es, p->thread.es);
 345        savesegment(ds, p->thread.ds);
 346
 347        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 348                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 349                if (!p->thread.io_bitmap_ptr) {
 350                        p->thread.io_bitmap_max = 0;
 351                        return -ENOMEM;
 352                }
 353                memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 354                                IO_BITMAP_BYTES);
 355                set_tsk_thread_flag(p, TIF_IO_BITMAP);
 356        }
 357
 358        /*
 359         * Set a new TLS for the child thread?
 360         */
 361        if (clone_flags & CLONE_SETTLS) {
 362#ifdef CONFIG_IA32_EMULATION
 363                if (test_thread_flag(TIF_IA32))
 364                        err = do_set_thread_area(p, -1,
 365                                (struct user_desc __user *)childregs->si, 0);
 366                else                    
 367#endif   
 368                        err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
 369                if (err) 
 370                        goto out;
 371        }
 372        err = 0;
 373out:
 374        if (err && p->thread.io_bitmap_ptr) {
 375                kfree(p->thread.io_bitmap_ptr);
 376                p->thread.io_bitmap_max = 0;
 377        }
 378        return err;
 379}
 380
 381void
 382start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 383{
 384        loadsegment(fs, 0);
 385        loadsegment(es, 0);
 386        loadsegment(ds, 0);
 387        load_gs_index(0);
 388        regs->ip                = new_ip;
 389        regs->sp                = new_sp;
 390        write_pda(oldrsp, new_sp);
 391        regs->cs                = __USER_CS;
 392        regs->ss                = __USER_DS;
 393        regs->flags             = 0x200;
 394        set_fs(USER_DS);
 395        /*
 396         * Free the old FP and other extended state
 397         */
 398        free_thread_xstate(current);
 399}
 400EXPORT_SYMBOL_GPL(start_thread);
 401
 402static void hard_disable_TSC(void)
 403{
 404        write_cr4(read_cr4() | X86_CR4_TSD);
 405}
 406
 407void disable_TSC(void)
 408{
 409        preempt_disable();
 410        if (!test_and_set_thread_flag(TIF_NOTSC))
 411                /*
 412                 * Must flip the CPU state synchronously with
 413                 * TIF_NOTSC in the current running context.
 414                 */
 415                hard_disable_TSC();
 416        preempt_enable();
 417}
 418
 419static void hard_enable_TSC(void)
 420{
 421        write_cr4(read_cr4() & ~X86_CR4_TSD);
 422}
 423
 424static void enable_TSC(void)
 425{
 426        preempt_disable();
 427        if (test_and_clear_thread_flag(TIF_NOTSC))
 428                /*
 429                 * Must flip the CPU state synchronously with
 430                 * TIF_NOTSC in the current running context.
 431                 */
 432                hard_enable_TSC();
 433        preempt_enable();
 434}
 435
 436int get_tsc_mode(unsigned long adr)
 437{
 438        unsigned int val;
 439
 440        if (test_thread_flag(TIF_NOTSC))
 441                val = PR_TSC_SIGSEGV;
 442        else
 443                val = PR_TSC_ENABLE;
 444
 445        return put_user(val, (unsigned int __user *)adr);
 446}
 447
 448int set_tsc_mode(unsigned int val)
 449{
 450        if (val == PR_TSC_SIGSEGV)
 451                disable_TSC();
 452        else if (val == PR_TSC_ENABLE)
 453                enable_TSC();
 454        else
 455                return -EINVAL;
 456
 457        return 0;
 458}
 459
 460/*
 461 * This special macro can be used to load a debugging register
 462 */
 463#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
 464
 465static inline void __switch_to_xtra(struct task_struct *prev_p,
 466                                    struct task_struct *next_p,
 467                                    struct tss_struct *tss)
 468{
 469        struct thread_struct *prev, *next;
 470        unsigned long debugctl;
 471
 472        prev = &prev_p->thread,
 473        next = &next_p->thread;
 474
 475        debugctl = prev->debugctlmsr;
 476        if (next->ds_area_msr != prev->ds_area_msr) {
 477                /* we clear debugctl to make sure DS
 478                 * is not in use when we change it */
 479                debugctl = 0;
 480                update_debugctlmsr(0);
 481                wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
 482        }
 483
 484        if (next->debugctlmsr != debugctl)
 485                update_debugctlmsr(next->debugctlmsr);
 486
 487        if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 488                loaddebug(next, 0);
 489                loaddebug(next, 1);
 490                loaddebug(next, 2);
 491                loaddebug(next, 3);
 492                /* no 4 and 5 */
 493                loaddebug(next, 6);
 494                loaddebug(next, 7);
 495        }
 496
 497        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 498            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 499                /* prev and next are different */
 500                if (test_tsk_thread_flag(next_p, TIF_NOTSC))
 501                        hard_disable_TSC();
 502                else
 503                        hard_enable_TSC();
 504        }
 505
 506        if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 507                /*
 508                 * Copy the relevant range of the IO bitmap.
 509                 * Normally this is 128 bytes or less:
 510                 */
 511                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 512                       max(prev->io_bitmap_max, next->io_bitmap_max));
 513        } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 514                /*
 515                 * Clear any possible leftover bits:
 516                 */
 517                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 518        }
 519
 520#ifdef X86_BTS
 521        if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 522                ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 523
 524        if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 525                ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
 526#endif
 527}
 528
 529/*
 530 *      switch_to(x,y) should switch tasks from x to y.
 531 *
 532 * This could still be optimized:
 533 * - fold all the options into a flag word and test it with a single test.
 534 * - could test fs/gs bitsliced
 535 *
 536 * Kprobes not supported here. Set the probe on schedule instead.
 537 */
 538struct task_struct *
 539__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 540{
 541        struct thread_struct *prev = &prev_p->thread;
 542        struct thread_struct *next = &next_p->thread;
 543        int cpu = smp_processor_id();
 544        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 545        unsigned fsindex, gsindex;
 546
 547        /* we're going to use this soon, after a few expensive things */
 548        if (next_p->fpu_counter>5)
 549                prefetch(next->xstate);
 550
 551        /*
 552         * Reload esp0, LDT and the page table pointer:
 553         */
 554        load_sp0(tss, next);
 555
 556        /* 
 557         * Switch DS and ES.
 558         * This won't pick up thread selector changes, but I guess that is ok.
 559         */
 560        savesegment(es, prev->es);
 561        if (unlikely(next->es | prev->es))
 562                loadsegment(es, next->es); 
 563
 564        savesegment(ds, prev->ds);
 565        if (unlikely(next->ds | prev->ds))
 566                loadsegment(ds, next->ds);
 567
 568
 569        /* We must save %fs and %gs before load_TLS() because
 570         * %fs and %gs may be cleared by load_TLS().
 571         *
 572         * (e.g. xen_load_tls())
 573         */
 574        savesegment(fs, fsindex);
 575        savesegment(gs, gsindex);
 576
 577        load_TLS(next, cpu);
 578
 579        /*
 580         * Leave lazy mode, flushing any hypercalls made here.
 581         * This must be done before restoring TLS segments so
 582         * the GDT and LDT are properly updated, and must be
 583         * done before math_state_restore, so the TS bit is up
 584         * to date.
 585         */
 586        arch_leave_lazy_cpu_mode();
 587
 588        /* 
 589         * Switch FS and GS.
 590         *
 591         * Segment register != 0 always requires a reload.  Also
 592         * reload when it has changed.  When prev process used 64bit
 593         * base always reload to avoid an information leak.
 594         */
 595        if (unlikely(fsindex | next->fsindex | prev->fs)) {
 596                loadsegment(fs, next->fsindex);
 597                /* 
 598                 * Check if the user used a selector != 0; if yes
 599                 *  clear 64bit base, since overloaded base is always
 600                 *  mapped to the Null selector
 601                 */
 602                if (fsindex)
 603                        prev->fs = 0;                           
 604        }
 605        /* when next process has a 64bit base use it */
 606        if (next->fs)
 607                wrmsrl(MSR_FS_BASE, next->fs);
 608        prev->fsindex = fsindex;
 609
 610        if (unlikely(gsindex | next->gsindex | prev->gs)) {
 611                load_gs_index(next->gsindex);
 612                if (gsindex)
 613                        prev->gs = 0;                           
 614        }
 615        if (next->gs)
 616                wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 617        prev->gsindex = gsindex;
 618
 619        /* Must be after DS reload */
 620        unlazy_fpu(prev_p);
 621
 622        /* 
 623         * Switch the PDA and FPU contexts.
 624         */
 625        prev->usersp = read_pda(oldrsp);
 626        write_pda(oldrsp, next->usersp);
 627        write_pda(pcurrent, next_p); 
 628
 629        write_pda(kernelstack,
 630                  (unsigned long)task_stack_page(next_p) +
 631                  THREAD_SIZE - PDA_STACKOFFSET);
 632#ifdef CONFIG_CC_STACKPROTECTOR
 633        write_pda(stack_canary, next_p->stack_canary);
 634        /*
 635         * Build time only check to make sure the stack_canary is at
 636         * offset 40 in the pda; this is a gcc ABI requirement
 637         */
 638        BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
 639#endif
 640
 641        /*
 642         * Now maybe reload the debug registers and handle I/O bitmaps
 643         */
 644        if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
 645                     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 646                __switch_to_xtra(prev_p, next_p, tss);
 647
 648        /* If the task has used fpu the last 5 timeslices, just do a full
 649         * restore of the math state immediately to avoid the trap; the
 650         * chances of needing FPU soon are obviously high now
 651         *
 652         * tsk_used_math() checks prevent calling math_state_restore(),
 653         * which can sleep in the case of !tsk_used_math()
 654         */
 655        if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
 656                math_state_restore();
 657        return prev_p;
 658}
 659
 660/*
 661 * sys_execve() executes a new program.
 662 */
 663asmlinkage
 664long sys_execve(char __user *name, char __user * __user *argv,
 665                char __user * __user *envp, struct pt_regs *regs)
 666{
 667        long error;
 668        char * filename;
 669
 670        filename = getname(name);
 671        error = PTR_ERR(filename);
 672        if (IS_ERR(filename))
 673                return error;
 674        error = do_execve(filename, argv, envp, regs);
 675        putname(filename);
 676        return error;
 677}
 678
 679void set_personality_64bit(void)
 680{
 681        /* inherit personality from parent */
 682
 683        /* Make sure to be in 64bit mode */
 684        clear_thread_flag(TIF_IA32);
 685
 686        /* TBD: overwrites user setup. Should have two bits.
 687           But 64bit processes have always behaved this way,
 688           so it's not too bad. The main problem is just that
 689           32bit childs are affected again. */
 690        current->personality &= ~READ_IMPLIES_EXEC;
 691}
 692
 693asmlinkage long sys_fork(struct pt_regs *regs)
 694{
 695        return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 696}
 697
 698asmlinkage long
 699sys_clone(unsigned long clone_flags, unsigned long newsp,
 700          void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 701{
 702        if (!newsp)
 703                newsp = regs->sp;
 704        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 705}
 706
 707/*
 708 * This is trivial, and on the face of it looks like it
 709 * could equally well be done in user mode.
 710 *
 711 * Not so, for quite unobvious reasons - register pressure.
 712 * In user mode vfork() cannot have a stack frame, and if
 713 * done by calling the "clone()" system call directly, you
 714 * do not have enough call-clobbered registers to hold all
 715 * the information you need.
 716 */
 717asmlinkage long sys_vfork(struct pt_regs *regs)
 718{
 719        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 720                    NULL, NULL);
 721}
 722
 723unsigned long get_wchan(struct task_struct *p)
 724{
 725        unsigned long stack;
 726        u64 fp,ip;
 727        int count = 0;
 728
 729        if (!p || p == current || p->state==TASK_RUNNING)
 730                return 0; 
 731        stack = (unsigned long)task_stack_page(p);
 732        if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
 733                return 0;
 734        fp = *(u64 *)(p->thread.sp);
 735        do { 
 736                if (fp < (unsigned long)stack ||
 737                    fp >= (unsigned long)stack+THREAD_SIZE)
 738                        return 0; 
 739                ip = *(u64 *)(fp+8);
 740                if (!in_sched_functions(ip))
 741                        return ip;
 742                fp = *(u64 *)fp; 
 743        } while (count++ < 16); 
 744        return 0;
 745}
 746
 747long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 748{ 
 749        int ret = 0; 
 750        int doit = task == current;
 751        int cpu;
 752
 753        switch (code) { 
 754        case ARCH_SET_GS:
 755                if (addr >= TASK_SIZE_OF(task))
 756                        return -EPERM; 
 757                cpu = get_cpu();
 758                /* handle small bases via the GDT because that's faster to 
 759                   switch. */
 760                if (addr <= 0xffffffff) {  
 761                        set_32bit_tls(task, GS_TLS, addr); 
 762                        if (doit) { 
 763                                load_TLS(&task->thread, cpu);
 764                                load_gs_index(GS_TLS_SEL); 
 765                        }
 766                        task->thread.gsindex = GS_TLS_SEL; 
 767                        task->thread.gs = 0;
 768                } else { 
 769                        task->thread.gsindex = 0;
 770                        task->thread.gs = addr;
 771                        if (doit) {
 772                                load_gs_index(0);
 773                                ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 774                        } 
 775                }
 776                put_cpu();
 777                break;
 778        case ARCH_SET_FS:
 779                /* Not strictly needed for fs, but do it for symmetry
 780                   with gs */
 781                if (addr >= TASK_SIZE_OF(task))
 782                        return -EPERM;
 783                cpu = get_cpu();
 784                /* handle small bases via the GDT because that's faster to
 785                   switch. */
 786                if (addr <= 0xffffffff) {
 787                        set_32bit_tls(task, FS_TLS, addr);
 788                        if (doit) {
 789                                load_TLS(&task->thread, cpu);
 790                                loadsegment(fs, FS_TLS_SEL);
 791                        }
 792                        task->thread.fsindex = FS_TLS_SEL;
 793                        task->thread.fs = 0;
 794                } else {
 795                        task->thread.fsindex = 0;
 796                        task->thread.fs = addr;
 797                        if (doit) {
 798                                /* set the selector to 0 to not confuse
 799                                   __switch_to */
 800                                loadsegment(fs, 0);
 801                                ret = checking_wrmsrl(MSR_FS_BASE, addr);
 802                        }
 803                }
 804                put_cpu();
 805                break;
 806        case ARCH_GET_FS: {
 807                unsigned long base;
 808                if (task->thread.fsindex == FS_TLS_SEL)
 809                        base = read_32bit_tls(task, FS_TLS);
 810                else if (doit)
 811                        rdmsrl(MSR_FS_BASE, base);
 812                else
 813                        base = task->thread.fs;
 814                ret = put_user(base, (unsigned long __user *)addr);
 815                break;
 816        }
 817        case ARCH_GET_GS: {
 818                unsigned long base;
 819                unsigned gsindex;
 820                if (task->thread.gsindex == GS_TLS_SEL)
 821                        base = read_32bit_tls(task, GS_TLS);
 822                else if (doit) {
 823                        savesegment(gs, gsindex);
 824                        if (gsindex)
 825                                rdmsrl(MSR_KERNEL_GS_BASE, base);
 826                        else
 827                                base = task->thread.gs;
 828                }
 829                else
 830                        base = task->thread.gs;
 831                ret = put_user(base, (unsigned long __user *)addr);
 832                break;
 833        }
 834
 835        default:
 836                ret = -EINVAL;
 837                break;
 838        }
 839
 840        return ret;
 841}
 842
 843long sys_arch_prctl(int code, unsigned long addr)
 844{
 845        return do_arch_prctl(current, code, addr);
 846}
 847
 848unsigned long arch_align_stack(unsigned long sp)
 849{
 850        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 851                sp -= get_random_int() % 8192;
 852        return sp & ~0xf;
 853}
 854
 855unsigned long arch_randomize_brk(struct mm_struct *mm)
 856{
 857        unsigned long range_end = mm->brk + 0x02000000;
 858        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 859}
 860
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.