linux/arch/x86/kernel/process_64.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1995  Linus Torvalds
   3 *
   4 *  Pentium III FXSR, SSE support
   5 *      Gareth Hughes <gareth@valinux.com>, May 2000
   6 *
   7 *  X86-64 port
   8 *      Andi Kleen.
   9 *
  10 *      CPU hotplug support - ashok.raj@intel.com
  11 */
  12
  13/*
  14 * This file handles the architecture-dependent parts of process handling..
  15 */
  16
  17#include <stdarg.h>
  18
  19#include <linux/cpu.h>
  20#include <linux/errno.h>
  21#include <linux/sched.h>
  22#include <linux/fs.h>
  23#include <linux/kernel.h>
  24#include <linux/mm.h>
  25#include <linux/elfcore.h>
  26#include <linux/smp.h>
  27#include <linux/slab.h>
  28#include <linux/user.h>
  29#include <linux/interrupt.h>
  30#include <linux/utsname.h>
  31#include <linux/delay.h>
  32#include <linux/module.h>
  33#include <linux/ptrace.h>
  34#include <linux/random.h>
  35#include <linux/notifier.h>
  36#include <linux/kprobes.h>
  37#include <linux/kdebug.h>
  38#include <linux/tick.h>
  39#include <linux/prctl.h>
  40
  41#include <asm/uaccess.h>
  42#include <asm/pgtable.h>
  43#include <asm/system.h>
  44#include <asm/io.h>
  45#include <asm/processor.h>
  46#include <asm/i387.h>
  47#include <asm/mmu_context.h>
  48#include <asm/pda.h>
  49#include <asm/prctl.h>
  50#include <asm/desc.h>
  51#include <asm/proto.h>
  52#include <asm/ia32.h>
  53#include <asm/idle.h>
  54
  55asmlinkage extern void ret_from_fork(void);
  56
  57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
  58
  59unsigned long boot_option_idle_override = 0;
  60EXPORT_SYMBOL(boot_option_idle_override);
  61
  62/*
  63 * Powermanagement idle function, if any..
  64 */
  65void (*pm_idle)(void);
  66EXPORT_SYMBOL(pm_idle);
  67
  68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  69
  70void idle_notifier_register(struct notifier_block *n)
  71{
  72        atomic_notifier_chain_register(&idle_notifier, n);
  73}
  74
  75void enter_idle(void)
  76{
  77        write_pda(isidle, 1);
  78        atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
  79}
  80
  81static void __exit_idle(void)
  82{
  83        if (test_and_clear_bit_pda(0, isidle) == 0)
  84                return;
  85        atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
  86}
  87
  88/* Called from interrupts to signify idle end */
  89void exit_idle(void)
  90{
  91        /* idle loop has pid 0 */
  92        if (current->pid)
  93                return;
  94        __exit_idle();
  95}
  96
  97/*
  98 * We use this if we don't have any better
  99 * idle routine..
 100 */
 101void default_idle(void)
 102{
 103        current_thread_info()->status &= ~TS_POLLING;
 104        /*
 105         * TS_POLLING-cleared state must be visible before we
 106         * test NEED_RESCHED:
 107         */
 108        smp_mb();
 109        if (!need_resched())
 110                safe_halt();    /* enables interrupts racelessly */
 111        else
 112                local_irq_enable();
 113        current_thread_info()->status |= TS_POLLING;
 114}
 115
 116#ifdef CONFIG_HOTPLUG_CPU
 117DECLARE_PER_CPU(int, cpu_state);
 118
 119#include <asm/nmi.h>
 120/* We halt the CPU with physical CPU hotplug */
 121static inline void play_dead(void)
 122{
 123        idle_task_exit();
 124        wbinvd();
 125        mb();
 126        /* Ack it */
 127        __get_cpu_var(cpu_state) = CPU_DEAD;
 128
 129        local_irq_disable();
 130        while (1)
 131                halt();
 132}
 133#else
 134static inline void play_dead(void)
 135{
 136        BUG();
 137}
 138#endif /* CONFIG_HOTPLUG_CPU */
 139
 140/*
 141 * The idle thread. There's no useful work to be
 142 * done, so just try to conserve power and have a
 143 * low exit latency (ie sit in a loop waiting for
 144 * somebody to say that they'd like to reschedule)
 145 */
 146void cpu_idle(void)
 147{
 148        current_thread_info()->status |= TS_POLLING;
 149        /* endless idle loop with no priority at all */
 150        while (1) {
 151                tick_nohz_stop_sched_tick();
 152                while (!need_resched()) {
 153                        void (*idle)(void);
 154
 155                        rmb();
 156                        idle = pm_idle;
 157                        if (!idle)
 158                                idle = default_idle;
 159                        if (cpu_is_offline(smp_processor_id()))
 160                                play_dead();
 161                        /*
 162                         * Idle routines should keep interrupts disabled
 163                         * from here on, until they go to idle.
 164                         * Otherwise, idle callbacks can misfire.
 165                         */
 166                        local_irq_disable();
 167                        enter_idle();
 168                        idle();
 169                        /* In many cases the interrupt that ended idle
 170                           has already called exit_idle. But some idle
 171                           loops can be woken up without interrupt. */
 172                        __exit_idle();
 173                }
 174
 175                tick_nohz_restart_sched_tick();
 176                preempt_enable_no_resched();
 177                schedule();
 178                preempt_disable();
 179        }
 180}
 181
 182/* Prints also some state that isn't saved in the pt_regs */
 183void __show_regs(struct pt_regs * regs)
 184{
 185        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 186        unsigned long d0, d1, d2, d3, d6, d7;
 187        unsigned int fsindex, gsindex;
 188        unsigned int ds, cs, es;
 189
 190        printk("\n");
 191        print_modules();
 192        printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 193                current->pid, current->comm, print_tainted(),
 194                init_utsname()->release,
 195                (int)strcspn(init_utsname()->version, " "),
 196                init_utsname()->version);
 197        printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 198        printk_address(regs->ip, 1);
 199        printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
 200                regs->flags);
 201        printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 202               regs->ax, regs->bx, regs->cx);
 203        printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
 204               regs->dx, regs->si, regs->di);
 205        printk("RBP: %016lx R08: %016lx R09: %016lx\n",
 206               regs->bp, regs->r8, regs->r9);
 207        printk("R10: %016lx R11: %016lx R12: %016lx\n",
 208               regs->r10, regs->r11, regs->r12); 
 209        printk("R13: %016lx R14: %016lx R15: %016lx\n",
 210               regs->r13, regs->r14, regs->r15); 
 211
 212        asm("movl %%ds,%0" : "=r" (ds)); 
 213        asm("movl %%cs,%0" : "=r" (cs)); 
 214        asm("movl %%es,%0" : "=r" (es)); 
 215        asm("movl %%fs,%0" : "=r" (fsindex));
 216        asm("movl %%gs,%0" : "=r" (gsindex));
 217
 218        rdmsrl(MSR_FS_BASE, fs);
 219        rdmsrl(MSR_GS_BASE, gs); 
 220        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
 221
 222        cr0 = read_cr0();
 223        cr2 = read_cr2();
 224        cr3 = read_cr3();
 225        cr4 = read_cr4();
 226
 227        printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
 228               fs,fsindex,gs,gsindex,shadowgs); 
 229        printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
 230        printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
 231
 232        get_debugreg(d0, 0);
 233        get_debugreg(d1, 1);
 234        get_debugreg(d2, 2);
 235        printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 236        get_debugreg(d3, 3);
 237        get_debugreg(d6, 6);
 238        get_debugreg(d7, 7);
 239        printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 240}
 241
 242void show_regs(struct pt_regs *regs)
 243{
 244        printk("CPU %d:", smp_processor_id());
 245        __show_regs(regs);
 246        show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 247}
 248
 249/*
 250 * Free current thread data structures etc..
 251 */
 252void exit_thread(void)
 253{
 254        struct task_struct *me = current;
 255        struct thread_struct *t = &me->thread;
 256
 257        if (me->thread.io_bitmap_ptr) {
 258                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 259
 260                kfree(t->io_bitmap_ptr);
 261                t->io_bitmap_ptr = NULL;
 262                clear_thread_flag(TIF_IO_BITMAP);
 263                /*
 264                 * Careful, clear this in the TSS too:
 265                 */
 266                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
 267                t->io_bitmap_max = 0;
 268                put_cpu();
 269        }
 270}
 271
 272void flush_thread(void)
 273{
 274        struct task_struct *tsk = current;
 275
 276        if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
 277                clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
 278                if (test_tsk_thread_flag(tsk, TIF_IA32)) {
 279                        clear_tsk_thread_flag(tsk, TIF_IA32);
 280                } else {
 281                        set_tsk_thread_flag(tsk, TIF_IA32);
 282                        current_thread_info()->status |= TS_COMPAT;
 283                }
 284        }
 285        clear_tsk_thread_flag(tsk, TIF_DEBUG);
 286
 287        tsk->thread.debugreg0 = 0;
 288        tsk->thread.debugreg1 = 0;
 289        tsk->thread.debugreg2 = 0;
 290        tsk->thread.debugreg3 = 0;
 291        tsk->thread.debugreg6 = 0;
 292        tsk->thread.debugreg7 = 0;
 293        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 294        /*
 295         * Forget coprocessor state..
 296         */
 297        tsk->fpu_counter = 0;
 298        clear_fpu(tsk);
 299        clear_used_math();
 300}
 301
 302void release_thread(struct task_struct *dead_task)
 303{
 304        if (dead_task->mm) {
 305                if (dead_task->mm->context.size) {
 306                        printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
 307                                        dead_task->comm,
 308                                        dead_task->mm->context.ldt,
 309                                        dead_task->mm->context.size);
 310                        BUG();
 311                }
 312        }
 313}
 314
 315static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 316{
 317        struct user_desc ud = {
 318                .base_addr = addr,
 319                .limit = 0xfffff,
 320                .seg_32bit = 1,
 321                .limit_in_pages = 1,
 322                .useable = 1,
 323        };
 324        struct desc_struct *desc = t->thread.tls_array;
 325        desc += tls;
 326        fill_ldt(desc, &ud);
 327}
 328
 329static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 330{
 331        return get_desc_base(&t->thread.tls_array[tls]);
 332}
 333
 334/*
 335 * This gets called before we allocate a new thread and copy
 336 * the current task into it.
 337 */
 338void prepare_to_copy(struct task_struct *tsk)
 339{
 340        unlazy_fpu(tsk);
 341}
 342
 343int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 344                unsigned long unused,
 345        struct task_struct * p, struct pt_regs * regs)
 346{
 347        int err;
 348        struct pt_regs * childregs;
 349        struct task_struct *me = current;
 350
 351        childregs = ((struct pt_regs *)
 352                        (THREAD_SIZE + task_stack_page(p))) - 1;
 353        *childregs = *regs;
 354
 355        childregs->ax = 0;
 356        childregs->sp = sp;
 357        if (sp == ~0UL)
 358                childregs->sp = (unsigned long)childregs;
 359
 360        p->thread.sp = (unsigned long) childregs;
 361        p->thread.sp0 = (unsigned long) (childregs+1);
 362        p->thread.usersp = me->thread.usersp;
 363
 364        set_tsk_thread_flag(p, TIF_FORK);
 365
 366        p->thread.fs = me->thread.fs;
 367        p->thread.gs = me->thread.gs;
 368
 369        asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
 370        asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
 371        asm("mov %%es,%0" : "=m" (p->thread.es));
 372        asm("mov %%ds,%0" : "=m" (p->thread.ds));
 373
 374        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 375                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 376                if (!p->thread.io_bitmap_ptr) {
 377                        p->thread.io_bitmap_max = 0;
 378                        return -ENOMEM;
 379                }
 380                memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 381                                IO_BITMAP_BYTES);
 382                set_tsk_thread_flag(p, TIF_IO_BITMAP);
 383        }
 384
 385        /*
 386         * Set a new TLS for the child thread?
 387         */
 388        if (clone_flags & CLONE_SETTLS) {
 389#ifdef CONFIG_IA32_EMULATION
 390                if (test_thread_flag(TIF_IA32))
 391                        err = do_set_thread_area(p, -1,
 392                                (struct user_desc __user *)childregs->si, 0);
 393                else                    
 394#endif   
 395                        err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
 396                if (err) 
 397                        goto out;
 398        }
 399        err = 0;
 400out:
 401        if (err && p->thread.io_bitmap_ptr) {
 402                kfree(p->thread.io_bitmap_ptr);
 403                p->thread.io_bitmap_max = 0;
 404        }
 405        return err;
 406}
 407
 408void
 409start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 410{
 411        asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
 412        load_gs_index(0);
 413        regs->ip                = new_ip;
 414        regs->sp                = new_sp;
 415        write_pda(oldrsp, new_sp);
 416        regs->cs                = __USER_CS;
 417        regs->ss                = __USER_DS;
 418        regs->flags             = 0x200;
 419        set_fs(USER_DS);
 420        /*
 421         * Free the old FP and other extended state
 422         */
 423        free_thread_xstate(current);
 424}
 425EXPORT_SYMBOL_GPL(start_thread);
 426
 427static void hard_disable_TSC(void)
 428{
 429        write_cr4(read_cr4() | X86_CR4_TSD);
 430}
 431
 432void disable_TSC(void)
 433{
 434        preempt_disable();
 435        if (!test_and_set_thread_flag(TIF_NOTSC))
 436                /*
 437                 * Must flip the CPU state synchronously with
 438                 * TIF_NOTSC in the current running context.
 439                 */
 440                hard_disable_TSC();
 441        preempt_enable();
 442}
 443
 444static void hard_enable_TSC(void)
 445{
 446        write_cr4(read_cr4() & ~X86_CR4_TSD);
 447}
 448
 449static void enable_TSC(void)
 450{
 451        preempt_disable();
 452        if (test_and_clear_thread_flag(TIF_NOTSC))
 453                /*
 454                 * Must flip the CPU state synchronously with
 455                 * TIF_NOTSC in the current running context.
 456                 */
 457                hard_enable_TSC();
 458        preempt_enable();
 459}
 460
 461int get_tsc_mode(unsigned long adr)
 462{
 463        unsigned int val;
 464
 465        if (test_thread_flag(TIF_NOTSC))
 466                val = PR_TSC_SIGSEGV;
 467        else
 468                val = PR_TSC_ENABLE;
 469
 470        return put_user(val, (unsigned int __user *)adr);
 471}
 472
 473int set_tsc_mode(unsigned int val)
 474{
 475        if (val == PR_TSC_SIGSEGV)
 476                disable_TSC();
 477        else if (val == PR_TSC_ENABLE)
 478                enable_TSC();
 479        else
 480                return -EINVAL;
 481
 482        return 0;
 483}
 484
 485/*
 486 * This special macro can be used to load a debugging register
 487 */
 488#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
 489
 490static inline void __switch_to_xtra(struct task_struct *prev_p,
 491                                    struct task_struct *next_p,
 492                                    struct tss_struct *tss)
 493{
 494        struct thread_struct *prev, *next;
 495        unsigned long debugctl;
 496
 497        prev = &prev_p->thread,
 498        next = &next_p->thread;
 499
 500        debugctl = prev->debugctlmsr;
 501        if (next->ds_area_msr != prev->ds_area_msr) {
 502                /* we clear debugctl to make sure DS
 503                 * is not in use when we change it */
 504                debugctl = 0;
 505                update_debugctlmsr(0);
 506                wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
 507        }
 508
 509        if (next->debugctlmsr != debugctl)
 510                update_debugctlmsr(next->debugctlmsr);
 511
 512        if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 513                loaddebug(next, 0);
 514                loaddebug(next, 1);
 515                loaddebug(next, 2);
 516                loaddebug(next, 3);
 517                /* no 4 and 5 */
 518                loaddebug(next, 6);
 519                loaddebug(next, 7);
 520        }
 521
 522        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 523            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 524                /* prev and next are different */
 525                if (test_tsk_thread_flag(next_p, TIF_NOTSC))
 526                        hard_disable_TSC();
 527                else
 528                        hard_enable_TSC();
 529        }
 530
 531        if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 532                /*
 533                 * Copy the relevant range of the IO bitmap.
 534                 * Normally this is 128 bytes or less:
 535                 */
 536                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 537                       max(prev->io_bitmap_max, next->io_bitmap_max));
 538        } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 539                /*
 540                 * Clear any possible leftover bits:
 541                 */
 542                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 543        }
 544
 545#ifdef X86_BTS
 546        if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 547                ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 548
 549        if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 550                ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
 551#endif
 552}
 553
 554/*
 555 *      switch_to(x,y) should switch tasks from x to y.
 556 *
 557 * This could still be optimized:
 558 * - fold all the options into a flag word and test it with a single test.
 559 * - could test fs/gs bitsliced
 560 *
 561 * Kprobes not supported here. Set the probe on schedule instead.
 562 */
 563struct task_struct *
 564__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 565{
 566        struct thread_struct *prev = &prev_p->thread,
 567                                 *next = &next_p->thread;
 568        int cpu = smp_processor_id();
 569        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 570
 571        /* we're going to use this soon, after a few expensive things */
 572        if (next_p->fpu_counter>5)
 573                prefetch(next->xstate);
 574
 575        /*
 576         * Reload esp0, LDT and the page table pointer:
 577         */
 578        load_sp0(tss, next);
 579
 580        /* 
 581         * Switch DS and ES.
 582         * This won't pick up thread selector changes, but I guess that is ok.
 583         */
 584        asm volatile("mov %%es,%0" : "=m" (prev->es));
 585        if (unlikely(next->es | prev->es))
 586                loadsegment(es, next->es); 
 587        
 588        asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
 589        if (unlikely(next->ds | prev->ds))
 590                loadsegment(ds, next->ds);
 591
 592        load_TLS(next, cpu);
 593
 594        /* 
 595         * Switch FS and GS.
 596         */
 597        { 
 598                unsigned fsindex;
 599                asm volatile("movl %%fs,%0" : "=r" (fsindex)); 
 600                /* segment register != 0 always requires a reload. 
 601                   also reload when it has changed. 
 602                   when prev process used 64bit base always reload
 603                   to avoid an information leak. */
 604                if (unlikely(fsindex | next->fsindex | prev->fs)) {
 605                        loadsegment(fs, next->fsindex);
 606                        /* check if the user used a selector != 0
 607                         * if yes clear 64bit base, since overloaded base
 608                         * is always mapped to the Null selector
 609                         */
 610                        if (fsindex)
 611                        prev->fs = 0;                           
 612                }
 613                /* when next process has a 64bit base use it */
 614                if (next->fs) 
 615                        wrmsrl(MSR_FS_BASE, next->fs); 
 616                prev->fsindex = fsindex;
 617        }
 618        { 
 619                unsigned gsindex;
 620                asm volatile("movl %%gs,%0" : "=r" (gsindex)); 
 621                if (unlikely(gsindex | next->gsindex | prev->gs)) {
 622                        load_gs_index(next->gsindex);
 623                        if (gsindex)
 624                        prev->gs = 0;                           
 625                }
 626                if (next->gs)
 627                        wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
 628                prev->gsindex = gsindex;
 629        }
 630
 631        /* Must be after DS reload */
 632        unlazy_fpu(prev_p);
 633
 634        /* 
 635         * Switch the PDA and FPU contexts.
 636         */
 637        prev->usersp = read_pda(oldrsp);
 638        write_pda(oldrsp, next->usersp);
 639        write_pda(pcurrent, next_p); 
 640
 641        write_pda(kernelstack,
 642        (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
 643#ifdef CONFIG_CC_STACKPROTECTOR
 644        write_pda(stack_canary, next_p->stack_canary);
 645        /*
 646         * Build time only check to make sure the stack_canary is at
 647         * offset 40 in the pda; this is a gcc ABI requirement
 648         */
 649        BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
 650#endif
 651
 652        /*
 653         * Now maybe reload the debug registers and handle I/O bitmaps
 654         */
 655        if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
 656                     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 657                __switch_to_xtra(prev_p, next_p, tss);
 658
 659        /* If the task has used fpu the last 5 timeslices, just do a full
 660         * restore of the math state immediately to avoid the trap; the
 661         * chances of needing FPU soon are obviously high now
 662         *
 663         * tsk_used_math() checks prevent calling math_state_restore(),
 664         * which can sleep in the case of !tsk_used_math()
 665         */
 666        if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
 667                math_state_restore();
 668        return prev_p;
 669}
 670
 671/*
 672 * sys_execve() executes a new program.
 673 */
 674asmlinkage
 675long sys_execve(char __user *name, char __user * __user *argv,
 676                char __user * __user *envp, struct pt_regs *regs)
 677{
 678        long error;
 679        char * filename;
 680
 681        filename = getname(name);
 682        error = PTR_ERR(filename);
 683        if (IS_ERR(filename))
 684                return error;
 685        error = do_execve(filename, argv, envp, regs);
 686        putname(filename);
 687        return error;
 688}
 689
 690void set_personality_64bit(void)
 691{
 692        /* inherit personality from parent */
 693
 694        /* Make sure to be in 64bit mode */
 695        clear_thread_flag(TIF_IA32);
 696
 697        /* TBD: overwrites user setup. Should have two bits.
 698           But 64bit processes have always behaved this way,
 699           so it's not too bad. The main problem is just that
 700           32bit childs are affected again. */
 701        current->personality &= ~READ_IMPLIES_EXEC;
 702}
 703
 704asmlinkage long sys_fork(struct pt_regs *regs)
 705{
 706        return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 707}
 708
 709asmlinkage long
 710sys_clone(unsigned long clone_flags, unsigned long newsp,
 711          void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 712{
 713        if (!newsp)
 714                newsp = regs->sp;
 715        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 716}
 717
 718/*
 719 * This is trivial, and on the face of it looks like it
 720 * could equally well be done in user mode.
 721 *
 722 * Not so, for quite unobvious reasons - register pressure.
 723 * In user mode vfork() cannot have a stack frame, and if
 724 * done by calling the "clone()" system call directly, you
 725 * do not have enough call-clobbered registers to hold all
 726 * the information you need.
 727 */
 728asmlinkage long sys_vfork(struct pt_regs *regs)
 729{
 730        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 731                    NULL, NULL);
 732}
 733
 734unsigned long get_wchan(struct task_struct *p)
 735{
 736        unsigned long stack;
 737        u64 fp,ip;
 738        int count = 0;
 739
 740        if (!p || p == current || p->state==TASK_RUNNING)
 741                return 0; 
 742        stack = (unsigned long)task_stack_page(p);
 743        if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 744                return 0;
 745        fp = *(u64 *)(p->thread.sp);
 746        do { 
 747                if (fp < (unsigned long)stack ||
 748                    fp > (unsigned long)stack+THREAD_SIZE)
 749                        return 0; 
 750                ip = *(u64 *)(fp+8);
 751                if (!in_sched_functions(ip))
 752                        return ip;
 753                fp = *(u64 *)fp; 
 754        } while (count++ < 16); 
 755        return 0;
 756}
 757
 758long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 759{ 
 760        int ret = 0; 
 761        int doit = task == current;
 762        int cpu;
 763
 764        switch (code) { 
 765        case ARCH_SET_GS:
 766                if (addr >= TASK_SIZE_OF(task))
 767                        return -EPERM; 
 768                cpu = get_cpu();
 769                /* handle small bases via the GDT because that's faster to 
 770                   switch. */
 771                if (addr <= 0xffffffff) {  
 772                        set_32bit_tls(task, GS_TLS, addr); 
 773                        if (doit) { 
 774                                load_TLS(&task->thread, cpu);
 775                                load_gs_index(GS_TLS_SEL); 
 776                        }
 777                        task->thread.gsindex = GS_TLS_SEL; 
 778                        task->thread.gs = 0;
 779                } else { 
 780                        task->thread.gsindex = 0;
 781                        task->thread.gs = addr;
 782                        if (doit) {
 783                                load_gs_index(0);
 784                                ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 785                        } 
 786                }
 787                put_cpu();
 788                break;
 789        case ARCH_SET_FS:
 790                /* Not strictly needed for fs, but do it for symmetry
 791                   with gs */
 792                if (addr >= TASK_SIZE_OF(task))
 793                        return -EPERM;
 794                cpu = get_cpu();
 795                /* handle small bases via the GDT because that's faster to
 796                   switch. */
 797                if (addr <= 0xffffffff) {
 798                        set_32bit_tls(task, FS_TLS, addr);
 799                        if (doit) {
 800                                load_TLS(&task->thread, cpu);
 801                                asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
 802                        }
 803                        task->thread.fsindex = FS_TLS_SEL;
 804                        task->thread.fs = 0;
 805                } else {
 806                        task->thread.fsindex = 0;
 807                        task->thread.fs = addr;
 808                        if (doit) {
 809                                /* set the selector to 0 to not confuse
 810                                   __switch_to */
 811                                asm volatile("movl %0,%%fs" :: "r" (0));
 812                                ret = checking_wrmsrl(MSR_FS_BASE, addr);
 813                        }
 814                }
 815                put_cpu();
 816                break;
 817        case ARCH_GET_FS: {
 818                unsigned long base;
 819                if (task->thread.fsindex == FS_TLS_SEL)
 820                        base = read_32bit_tls(task, FS_TLS);
 821                else if (doit)
 822                        rdmsrl(MSR_FS_BASE, base);
 823                else
 824                        base = task->thread.fs;
 825                ret = put_user(base, (unsigned long __user *)addr);
 826                break;
 827        }
 828        case ARCH_GET_GS: {
 829                unsigned long base;
 830                unsigned gsindex;
 831                if (task->thread.gsindex == GS_TLS_SEL)
 832                        base = read_32bit_tls(task, GS_TLS);
 833                else if (doit) {
 834                        asm("movl %%gs,%0" : "=r" (gsindex));
 835                        if (gsindex)
 836                                rdmsrl(MSR_KERNEL_GS_BASE, base);
 837                        else
 838                                base = task->thread.gs;
 839                }
 840                else
 841                        base = task->thread.gs;
 842                ret = put_user(base, (unsigned long __user *)addr);
 843                break;
 844        }
 845
 846        default:
 847                ret = -EINVAL;
 848                break;
 849        }
 850
 851        return ret;
 852}
 853
 854long sys_arch_prctl(int code, unsigned long addr)
 855{
 856        return do_arch_prctl(current, code, addr);
 857}
 858
 859unsigned long arch_align_stack(unsigned long sp)
 860{
 861        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 862                sp -= get_random_int() % 8192;
 863        return sp & ~0xf;
 864}
 865
 866unsigned long arch_randomize_brk(struct mm_struct *mm)
 867{
 868        unsigned long range_end = mm->brk + 0x02000000;
 869        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 870}
 871
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.