linux-bk/kernel/fork.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/fork.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 *  'fork.c' contains the help-routines for the 'fork' system call
   9 * (see also entry.S and others).
  10 * Fork is rather simple, once you get the hang of it, but the memory
  11 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
  12 */
  13
  14#include <linux/config.h>
  15#include <linux/slab.h>
  16#include <linux/init.h>
  17#include <linux/unistd.h>
  18#include <linux/smp_lock.h>
  19#include <linux/module.h>
  20#include <linux/vmalloc.h>
  21#include <linux/completion.h>
  22#include <linux/namespace.h>
  23#include <linux/personality.h>
  24#include <linux/file.h>
  25#include <linux/binfmts.h>
  26#include <linux/mman.h>
  27#include <linux/fs.h>
  28#include <linux/security.h>
  29#include <linux/futex.h>
  30#include <linux/ptrace.h>
  31
  32#include <asm/pgtable.h>
  33#include <asm/pgalloc.h>
  34#include <asm/uaccess.h>
  35#include <asm/mmu_context.h>
  36#include <asm/cacheflush.h>
  37#include <asm/tlbflush.h>
  38
  39static kmem_cache_t *task_struct_cachep;
  40
  41extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
  42extern void exit_semundo(struct task_struct *tsk);
  43
  44/* The idle threads do not count.. */
  45int nr_threads;
  46
  47int max_threads;
  48unsigned long total_forks;      /* Handle normal Linux uptimes. */
  49
  50rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;  /* outer */
  51
  52/*
  53 * A per-CPU task cache - this relies on the fact that
  54 * the very last portion of sys_exit() is executed with
  55 * preemption turned off.
  56 */
  57static task_t *task_cache[NR_CPUS] __cacheline_aligned;
  58
  59void __put_task_struct(struct task_struct *tsk)
  60{
  61        if (tsk != current) {
  62                free_thread_info(tsk->thread_info);
  63                kmem_cache_free(task_struct_cachep,tsk);
  64        } else {
  65                int cpu = smp_processor_id();
  66
  67                tsk = xchg(task_cache + cpu, tsk);
  68                if (tsk) {
  69                        free_thread_info(tsk->thread_info);
  70                        kmem_cache_free(task_struct_cachep,tsk);
  71                }
  72        }
  73}
  74
  75void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
  76{
  77        unsigned long flags;
  78
  79        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  80        spin_lock_irqsave(&q->lock, flags);
  81        __add_wait_queue(q, wait);
  82        spin_unlock_irqrestore(&q->lock, flags);
  83}
  84
  85void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
  86{
  87        unsigned long flags;
  88
  89        wait->flags |= WQ_FLAG_EXCLUSIVE;
  90        spin_lock_irqsave(&q->lock, flags);
  91        __add_wait_queue_tail(q, wait);
  92        spin_unlock_irqrestore(&q->lock, flags);
  93}
  94
  95void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
  96{
  97        unsigned long flags;
  98
  99        spin_lock_irqsave(&q->lock, flags);
 100        __remove_wait_queue(q, wait);
 101        spin_unlock_irqrestore(&q->lock, flags);
 102}
 103
 104void __init fork_init(unsigned long mempages)
 105{
 106        /* create a slab on which task_structs can be allocated */
 107        task_struct_cachep =
 108                kmem_cache_create("task_struct",
 109                                  sizeof(struct task_struct),0,
 110                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
 111        if (!task_struct_cachep)
 112                panic("fork_init(): cannot create task_struct SLAB cache");
 113
 114        /*
 115         * The default maximum number of threads is set to a safe
 116         * value: the thread structures can take up at most half
 117         * of memory.
 118         */
 119        max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
 120
 121        init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 122        init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 123}
 124
 125static struct task_struct *dup_task_struct(struct task_struct *orig)
 126{
 127        struct task_struct *tsk;
 128        struct thread_info *ti;
 129
 130        tsk = xchg(task_cache + smp_processor_id(), NULL);
 131        if (!tsk) {
 132                ti = alloc_thread_info();
 133                if (!ti)
 134                        return NULL;
 135
 136                tsk = kmem_cache_alloc(task_struct_cachep, GFP_KERNEL);
 137                if (!tsk) {
 138                        free_thread_info(ti);
 139                        return NULL;
 140                }
 141        } else
 142                ti = tsk->thread_info;
 143
 144        *ti = *orig->thread_info;
 145        *tsk = *orig;
 146        tsk->thread_info = ti;
 147        ti->task = tsk;
 148        atomic_set(&tsk->usage,1);
 149        return tsk;
 150}
 151
 152static inline int dup_mmap(struct mm_struct * mm)
 153{
 154        struct vm_area_struct * mpnt, *tmp, **pprev;
 155        int retval;
 156        unsigned long charge = 0;
 157
 158        flush_cache_mm(current->mm);
 159        mm->locked_vm = 0;
 160        mm->mmap = NULL;
 161        mm->mmap_cache = NULL;
 162        mm->map_count = 0;
 163        mm->rss = 0;
 164        mm->cpu_vm_mask = 0;
 165        pprev = &mm->mmap;
 166
 167        /*
 168         * Add it to the mmlist after the parent.
 169         * Doing it this way means that we can order the list,
 170         * and fork() won't mess up the ordering significantly.
 171         * Add it first so that swapoff can see any swap entries.
 172         */
 173        spin_lock(&mmlist_lock);
 174        list_add(&mm->mmlist, &current->mm->mmlist);
 175        mmlist_nr++;
 176        spin_unlock(&mmlist_lock);
 177
 178        for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 179                struct file *file;
 180
 181                retval = -ENOMEM;
 182                if(mpnt->vm_flags & VM_DONTCOPY)
 183                        continue;
 184                if (mpnt->vm_flags & VM_ACCOUNT) {
 185                        unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 186                        if (!vm_enough_memory(len))
 187                                goto fail_nomem;
 188                        charge += len;
 189                }
 190                tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 191                if (!tmp)
 192                        goto fail_nomem;
 193                *tmp = *mpnt;
 194                tmp->vm_flags &= ~VM_LOCKED;
 195                tmp->vm_mm = mm;
 196                tmp->vm_next = NULL;
 197                file = tmp->vm_file;
 198                if (file) {
 199                        struct inode *inode = file->f_dentry->d_inode;
 200                        get_file(file);
 201                        if (tmp->vm_flags & VM_DENYWRITE)
 202                                atomic_dec(&inode->i_writecount);
 203      
 204                        /* insert tmp into the share list, just after mpnt */
 205                        spin_lock(&inode->i_mapping->i_shared_lock);
 206                        list_add_tail(&tmp->shared, &mpnt->shared);
 207                        spin_unlock(&inode->i_mapping->i_shared_lock);
 208                }
 209
 210                /*
 211                 * Link in the new vma and copy the page table entries:
 212                 * link in first so that swapoff can see swap entries.
 213                 */
 214                spin_lock(&mm->page_table_lock);
 215                *pprev = tmp;
 216                pprev = &tmp->vm_next;
 217                mm->map_count++;
 218                retval = copy_page_range(mm, current->mm, tmp);
 219                spin_unlock(&mm->page_table_lock);
 220
 221                if (tmp->vm_ops && tmp->vm_ops->open)
 222                        tmp->vm_ops->open(tmp);
 223
 224                if (retval)
 225                        goto fail_nomem;
 226        }
 227        retval = 0;
 228        build_mmap_rb(mm);
 229
 230out:
 231        flush_tlb_mm(current->mm);
 232        return retval;
 233fail_nomem:
 234        vm_unacct_memory(charge);
 235        goto out;
 236}
 237
 238spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 239int mmlist_nr;
 240
 241#define allocate_mm()   (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 242#define free_mm(mm)     (kmem_cache_free(mm_cachep, (mm)))
 243
 244#include <linux/init_task.h>
 245
 246static struct mm_struct * mm_init(struct mm_struct * mm)
 247{
 248        atomic_set(&mm->mm_users, 1);
 249        atomic_set(&mm->mm_count, 1);
 250        init_rwsem(&mm->mmap_sem);
 251        mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 252        mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
 253        mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
 254        mm->pgd = pgd_alloc(mm);
 255        if (mm->pgd)
 256                return mm;
 257        free_mm(mm);
 258        return NULL;
 259}
 260        
 261
 262/*
 263 * Allocate and initialize an mm_struct.
 264 */
 265struct mm_struct * mm_alloc(void)
 266{
 267        struct mm_struct * mm;
 268
 269        mm = allocate_mm();
 270        if (mm) {
 271                memset(mm, 0, sizeof(*mm));
 272                return mm_init(mm);
 273        }
 274        return NULL;
 275}
 276
 277/*
 278 * Called when the last reference to the mm
 279 * is dropped: either by a lazy thread or by
 280 * mmput. Free the page directory and the mm.
 281 */
 282inline void __mmdrop(struct mm_struct *mm)
 283{
 284        if (mm == &init_mm) BUG();
 285        pgd_free(mm->pgd);
 286        destroy_context(mm);
 287        free_mm(mm);
 288}
 289
 290/*
 291 * Decrement the use count and release all resources for an mm.
 292 */
 293void mmput(struct mm_struct *mm)
 294{
 295        if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
 296                list_del(&mm->mmlist);
 297                mmlist_nr--;
 298                spin_unlock(&mmlist_lock);
 299                exit_mmap(mm);
 300                mmdrop(mm);
 301        }
 302}
 303
 304/* Please note the differences between mmput and mm_release.
 305 * mmput is called whenever we stop holding onto a mm_struct,
 306 * error success whatever.
 307 *
 308 * mm_release is called after a mm_struct has been removed
 309 * from the current process.
 310 *
 311 * This difference is important for error handling, when we
 312 * only half set up a mm_struct for a new process and need to restore
 313 * the old one.  Because we mmput the new mm_struct before
 314 * restoring the old one. . .
 315 * Eric Biederman 10 January 1998
 316 */
 317void mm_release(void)
 318{
 319        struct task_struct *tsk = current;
 320        struct completion *vfork_done = tsk->vfork_done;
 321
 322        /* notify parent sleeping on vfork() */
 323        if (vfork_done) {
 324                tsk->vfork_done = NULL;
 325                complete(vfork_done);
 326        }
 327        if (tsk->user_tid) {
 328                /*
 329                 * We dont check the error code - if userspace has
 330                 * not set up a proper pointer then tough luck.
 331                 */
 332                put_user(0, tsk->user_tid);
 333                sys_futex(tsk->user_tid, FUTEX_WAKE, 1, NULL);
 334        }
 335}
 336
 337static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 338{
 339        struct mm_struct * mm, *oldmm;
 340        int retval;
 341
 342        tsk->min_flt = tsk->maj_flt = 0;
 343        tsk->cmin_flt = tsk->cmaj_flt = 0;
 344        tsk->nswap = tsk->cnswap = 0;
 345
 346        tsk->mm = NULL;
 347        tsk->active_mm = NULL;
 348
 349        /*
 350         * Are we cloning a kernel thread?
 351         *
 352         * We need to steal a active VM for that..
 353         */
 354        oldmm = current->mm;
 355        if (!oldmm)
 356                return 0;
 357
 358        if (clone_flags & CLONE_VM) {
 359                atomic_inc(&oldmm->mm_users);
 360                mm = oldmm;
 361                /*
 362                 * There are cases where the PTL is held to ensure no
 363                 * new threads start up in user mode using an mm, which
 364                 * allows optimizing out ipis; the tlb_gather_mmu code
 365                 * is an example.
 366                 */
 367                spin_unlock_wait(&oldmm->page_table_lock);
 368                goto good_mm;
 369        }
 370
 371        retval = -ENOMEM;
 372        mm = allocate_mm();
 373        if (!mm)
 374                goto fail_nomem;
 375
 376        /* Copy the current MM stuff.. */
 377        memcpy(mm, oldmm, sizeof(*mm));
 378        if (!mm_init(mm))
 379                goto fail_nomem;
 380
 381        if (init_new_context(tsk,mm))
 382                goto free_pt;
 383
 384        down_write(&oldmm->mmap_sem);
 385        retval = dup_mmap(mm);
 386        up_write(&oldmm->mmap_sem);
 387
 388        if (retval)
 389                goto free_pt;
 390
 391good_mm:
 392        tsk->mm = mm;
 393        tsk->active_mm = mm;
 394        return 0;
 395
 396free_pt:
 397        mmput(mm);
 398fail_nomem:
 399        return retval;
 400}
 401
 402static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 403{
 404        struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 405        /* We don't need to lock fs - think why ;-) */
 406        if (fs) {
 407                atomic_set(&fs->count, 1);
 408                fs->lock = RW_LOCK_UNLOCKED;
 409                fs->umask = old->umask;
 410                read_lock(&old->lock);
 411                fs->rootmnt = mntget(old->rootmnt);
 412                fs->root = dget(old->root);
 413                fs->pwdmnt = mntget(old->pwdmnt);
 414                fs->pwd = dget(old->pwd);
 415                if (old->altroot) {
 416                        fs->altrootmnt = mntget(old->altrootmnt);
 417                        fs->altroot = dget(old->altroot);
 418                } else {
 419                        fs->altrootmnt = NULL;
 420                        fs->altroot = NULL;
 421                }       
 422                read_unlock(&old->lock);
 423        }
 424        return fs;
 425}
 426
 427struct fs_struct *copy_fs_struct(struct fs_struct *old)
 428{
 429        return __copy_fs_struct(old);
 430}
 431
 432static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 433{
 434        if (clone_flags & CLONE_FS) {
 435                atomic_inc(&current->fs->count);
 436                return 0;
 437        }
 438        tsk->fs = __copy_fs_struct(current->fs);
 439        if (!tsk->fs)
 440                return -1;
 441        return 0;
 442}
 443
 444static int count_open_files(struct files_struct *files, int size)
 445{
 446        int i;
 447        
 448        /* Find the last open fd */
 449        for (i = size/(8*sizeof(long)); i > 0; ) {
 450                if (files->open_fds->fds_bits[--i])
 451                        break;
 452        }
 453        i = (i+1) * 8 * sizeof(long);
 454        return i;
 455}
 456
 457static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 458{
 459        struct files_struct *oldf, *newf;
 460        struct file **old_fds, **new_fds;
 461        int open_files, nfds, size, i, error = 0;
 462
 463        /*
 464         * A background process may not have any files ...
 465         */
 466        oldf = current->files;
 467        if (!oldf)
 468                goto out;
 469
 470        if (clone_flags & CLONE_FILES) {
 471                atomic_inc(&oldf->count);
 472                goto out;
 473        }
 474
 475        tsk->files = NULL;
 476        error = -ENOMEM;
 477        newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 478        if (!newf) 
 479                goto out;
 480
 481        atomic_set(&newf->count, 1);
 482
 483        newf->file_lock     = RW_LOCK_UNLOCKED;
 484        newf->next_fd       = 0;
 485        newf->max_fds       = NR_OPEN_DEFAULT;
 486        newf->max_fdset     = __FD_SETSIZE;
 487        newf->close_on_exec = &newf->close_on_exec_init;
 488        newf->open_fds      = &newf->open_fds_init;
 489        newf->fd            = &newf->fd_array[0];
 490
 491        /* We don't yet have the oldf readlock, but even if the old
 492           fdset gets grown now, we'll only copy up to "size" fds */
 493        size = oldf->max_fdset;
 494        if (size > __FD_SETSIZE) {
 495                newf->max_fdset = 0;
 496                write_lock(&newf->file_lock);
 497                error = expand_fdset(newf, size-1);
 498                write_unlock(&newf->file_lock);
 499                if (error)
 500                        goto out_release;
 501        }
 502        read_lock(&oldf->file_lock);
 503
 504        open_files = count_open_files(oldf, size);
 505
 506        /*
 507         * Check whether we need to allocate a larger fd array.
 508         * Note: we're not a clone task, so the open count won't
 509         * change.
 510         */
 511        nfds = NR_OPEN_DEFAULT;
 512        if (open_files > nfds) {
 513                read_unlock(&oldf->file_lock);
 514                newf->max_fds = 0;
 515                write_lock(&newf->file_lock);
 516                error = expand_fd_array(newf, open_files-1);
 517                write_unlock(&newf->file_lock);
 518                if (error) 
 519                        goto out_release;
 520                nfds = newf->max_fds;
 521                read_lock(&oldf->file_lock);
 522        }
 523
 524        old_fds = oldf->fd;
 525        new_fds = newf->fd;
 526
 527        memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 528        memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 529
 530        for (i = open_files; i != 0; i--) {
 531                struct file *f = *old_fds++;
 532                if (f)
 533                        get_file(f);
 534                *new_fds++ = f;
 535        }
 536        read_unlock(&oldf->file_lock);
 537
 538        /* compute the remainder to be cleared */
 539        size = (newf->max_fds - open_files) * sizeof(struct file *);
 540
 541        /* This is long word aligned thus could use a optimized version */ 
 542        memset(new_fds, 0, size); 
 543
 544        if (newf->max_fdset > open_files) {
 545                int left = (newf->max_fdset-open_files)/8;
 546                int start = open_files / (8 * sizeof(unsigned long));
 547                
 548                memset(&newf->open_fds->fds_bits[start], 0, left);
 549                memset(&newf->close_on_exec->fds_bits[start], 0, left);
 550        }
 551
 552        tsk->files = newf;
 553        error = 0;
 554out:
 555        return error;
 556
 557out_release:
 558        free_fdset (newf->close_on_exec, newf->max_fdset);
 559        free_fdset (newf->open_fds, newf->max_fdset);
 560        kmem_cache_free(files_cachep, newf);
 561        goto out;
 562}
 563
 564static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 565{
 566        struct signal_struct *sig;
 567
 568        if (clone_flags & CLONE_SIGHAND) {
 569                atomic_inc(&current->sig->count);
 570                return 0;
 571        }
 572        sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
 573        tsk->sig = sig;
 574        if (!sig)
 575                return -1;
 576        spin_lock_init(&sig->siglock);
 577        atomic_set(&sig->count, 1);
 578        sig->group_exit = 0;
 579        sig->group_exit_code = 0;
 580        sig->group_exit_task = NULL;
 581        memcpy(sig->action, current->sig->action, sizeof(sig->action));
 582        sig->curr_target = NULL;
 583        init_sigpending(&sig->shared_pending);
 584
 585        return 0;
 586}
 587
 588static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 589{
 590        unsigned long new_flags = p->flags;
 591
 592        new_flags &= ~PF_SUPERPRIV;
 593        new_flags |= PF_FORKNOEXEC;
 594        if (!(clone_flags & CLONE_PTRACE))
 595                p->ptrace = 0;
 596        p->flags = new_flags;
 597}
 598
 599/*
 600 * This creates a new process as a copy of the old one,
 601 * but does not actually start it yet.
 602 *
 603 * It copies the registers, and all the appropriate
 604 * parts of the process environment (as per the clone
 605 * flags). The actual kick-off is left to the caller.
 606 */
 607static struct task_struct *copy_process(unsigned long clone_flags,
 608                            unsigned long stack_start,
 609                            struct pt_regs *regs,
 610                            unsigned long stack_size,
 611                            int *user_tid)
 612{
 613        int retval;
 614        struct task_struct *p = NULL;
 615
 616        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 617                return ERR_PTR(-EINVAL);
 618
 619        /*
 620         * Thread groups must share signals as well, and detached threads
 621         * can only be started up within the thread group.
 622         */
 623        if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
 624                return ERR_PTR(-EINVAL);
 625        if ((clone_flags & CLONE_DETACHED) && !(clone_flags & CLONE_THREAD))
 626                return ERR_PTR(-EINVAL);
 627
 628        retval = security_ops->task_create(clone_flags);
 629        if (retval)
 630                goto fork_out;
 631
 632        retval = -ENOMEM;
 633        p = dup_task_struct(current);
 634        if (!p)
 635                goto fork_out;
 636
 637        retval = -EAGAIN;
 638        if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) {
 639                if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
 640                        goto bad_fork_free;
 641        }
 642
 643        atomic_inc(&p->user->__count);
 644        atomic_inc(&p->user->processes);
 645
 646        /*
 647         * Counter increases are protected by
 648         * the kernel lock so nr_threads can't
 649         * increase under us (but it may decrease).
 650         */
 651        if (nr_threads >= max_threads)
 652                goto bad_fork_cleanup_count;
 653        
 654        get_exec_domain(p->thread_info->exec_domain);
 655
 656        if (p->binfmt && p->binfmt->module)
 657                __MOD_INC_USE_COUNT(p->binfmt->module);
 658
 659#ifdef CONFIG_PREEMPT
 660        /*
 661         * schedule_tail drops this_rq()->lock so we compensate with a count
 662         * of 1.  Also, we want to start with kernel preemption disabled.
 663         */
 664        p->thread_info->preempt_count = 1;
 665#endif
 666        p->did_exec = 0;
 667        p->swappable = 0;
 668        p->state = TASK_UNINTERRUPTIBLE;
 669
 670        copy_flags(clone_flags, p);
 671        if (clone_flags & CLONE_IDLETASK)
 672                p->pid = 0;
 673        else {
 674                p->pid = alloc_pidmap();
 675                if (p->pid == -1)
 676                        goto bad_fork_cleanup;
 677        }
 678        p->proc_dentry = NULL;
 679
 680        INIT_LIST_HEAD(&p->run_list);
 681
 682        INIT_LIST_HEAD(&p->children);
 683        INIT_LIST_HEAD(&p->sibling);
 684        init_waitqueue_head(&p->wait_chldexit);
 685        p->vfork_done = NULL;
 686        spin_lock_init(&p->alloc_lock);
 687        spin_lock_init(&p->switch_lock);
 688
 689        clear_tsk_thread_flag(p,TIF_SIGPENDING);
 690        init_sigpending(&p->pending);
 691
 692        p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
 693        p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
 694        init_timer(&p->real_timer);
 695        p->real_timer.data = (unsigned long) p;
 696
 697        p->leader = 0;          /* session leadership doesn't inherit */
 698        p->tty_old_pgrp = 0;
 699        p->utime = p->stime = 0;
 700        p->cutime = p->cstime = 0;
 701#ifdef CONFIG_SMP
 702        {
 703                int i;
 704
 705                /* ?? should we just memset this ?? */
 706                for(i = 0; i < NR_CPUS; i++)
 707                        p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
 708                spin_lock_init(&p->sigmask_lock);
 709        }
 710#endif
 711        p->array = NULL;
 712        p->lock_depth = -1;             /* -1 = no lock */
 713        p->start_time = jiffies;
 714        p->security = NULL;
 715
 716        INIT_LIST_HEAD(&p->local_pages);
 717
 718        retval = -ENOMEM;
 719        if (security_ops->task_alloc_security(p))
 720                goto bad_fork_cleanup;
 721        /* copy all the process information */
 722        if (copy_semundo(clone_flags, p))
 723                goto bad_fork_cleanup_security;
 724        if (copy_files(clone_flags, p))
 725                goto bad_fork_cleanup_semundo;
 726        if (copy_fs(clone_flags, p))
 727                goto bad_fork_cleanup_files;
 728        if (copy_sighand(clone_flags, p))
 729                goto bad_fork_cleanup_fs;
 730        if (copy_mm(clone_flags, p))
 731                goto bad_fork_cleanup_sighand;
 732        if (copy_namespace(clone_flags, p))
 733                goto bad_fork_cleanup_mm;
 734        retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 735        if (retval)
 736                goto bad_fork_cleanup_namespace;
 737        /*
 738         * Notify the child of the TID?
 739         */
 740        retval = -EFAULT;
 741        if (clone_flags & CLONE_SETTID)
 742                if (put_user(p->pid, user_tid))
 743                        goto bad_fork_cleanup_namespace;
 744
 745        /*
 746         * Does the userspace VM want the TID cleared on mm_release()?
 747         */
 748        if (clone_flags & CLONE_CLEARTID)
 749                p->user_tid = user_tid;
 750
 751        /*
 752         * Syscall tracing should be turned off in the child regardless
 753         * of CLONE_PTRACE.
 754         */
 755        clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 756
 757        /* Our parent execution domain becomes current domain
 758           These must match for thread signalling to apply */
 759           
 760        p->parent_exec_id = p->self_exec_id;
 761
 762        /* ok, now we should be set up.. */
 763        p->swappable = 1;
 764        if (clone_flags & CLONE_DETACHED)
 765                p->exit_signal = -1;
 766        else
 767                p->exit_signal = clone_flags & CSIGNAL;
 768        p->pdeath_signal = 0;
 769
 770        /*
 771         * Share the timeslice between parent and child, thus the
 772         * total amount of pending timeslices in the system doesnt change,
 773         * resulting in more scheduling fairness.
 774         */
 775        local_irq_disable();
 776        p->time_slice = (current->time_slice + 1) >> 1;
 777        /*
 778         * The remainder of the first timeslice might be recovered by
 779         * the parent if the child exits early enough.
 780         */
 781        p->first_time_slice = 1;
 782        current->time_slice >>= 1;
 783        p->sleep_timestamp = jiffies;
 784        if (!current->time_slice) {
 785                /*
 786                 * This case is rare, it happens when the parent has only
 787                 * a single jiffy left from its timeslice. Taking the
 788                 * runqueue lock is not a problem.
 789                 */
 790                current->time_slice = 1;
 791                preempt_disable();
 792                scheduler_tick(0, 0);
 793                local_irq_enable();
 794                preempt_enable();
 795        } else
 796                local_irq_enable();
 797        /*
 798         * Ok, add it to the run-queues and make it
 799         * visible to the rest of the system.
 800         *
 801         * Let it rip!
 802         */
 803        p->tgid = p->pid;
 804        p->group_leader = p;
 805        INIT_LIST_HEAD(&p->thread_group);
 806        INIT_LIST_HEAD(&p->ptrace_children);
 807        INIT_LIST_HEAD(&p->ptrace_list);
 808
 809        /* Need tasklist lock for parent etc handling! */
 810        write_lock_irq(&tasklist_lock);
 811
 812        /* CLONE_PARENT re-uses the old parent */
 813        if (clone_flags & CLONE_PARENT)
 814                p->real_parent = current->real_parent;
 815        else
 816                p->real_parent = current;
 817        p->parent = p->real_parent;
 818
 819        if (clone_flags & CLONE_THREAD) {
 820                spin_lock(&current->sig->siglock);
 821                /*
 822                 * Important: if an exit-all has been started then
 823                 * do not create this new thread - the whole thread
 824                 * group is supposed to exit anyway.
 825                 */
 826                if (current->sig->group_exit) {
 827                        spin_unlock(&current->sig->siglock);
 828                        write_unlock_irq(&tasklist_lock);
 829                        goto bad_fork_cleanup_namespace;
 830                }
 831                p->tgid = current->tgid;
 832                p->group_leader = current->group_leader;
 833                list_add(&p->thread_group, &current->thread_group);
 834                spin_unlock(&current->sig->siglock);
 835        }
 836
 837        SET_LINKS(p);
 838        if (p->ptrace & PT_PTRACED)
 839                __ptrace_link(p, current->parent);
 840
 841        attach_pid(p, PIDTYPE_PID, p->pid);
 842        if (thread_group_leader(p)) {
 843                attach_pid(p, PIDTYPE_PGID, p->pgrp);
 844                attach_pid(p, PIDTYPE_SID, p->session);
 845        }
 846
 847        nr_threads++;
 848        write_unlock_irq(&tasklist_lock);
 849        retval = 0;
 850
 851fork_out:
 852        if (retval)
 853                return ERR_PTR(retval);
 854        return p;
 855
 856bad_fork_cleanup_namespace:
 857        exit_namespace(p);
 858bad_fork_cleanup_mm:
 859        exit_mm(p);
 860bad_fork_cleanup_sighand:
 861        exit_sighand(p);
 862bad_fork_cleanup_fs:
 863        exit_fs(p); /* blocking */
 864bad_fork_cleanup_files:
 865        exit_files(p); /* blocking */
 866bad_fork_cleanup_semundo:
 867        exit_semundo(p);
 868bad_fork_cleanup_security:
 869        security_ops->task_free_security(p);
 870bad_fork_cleanup:
 871        if (p->pid > 0)
 872                free_pidmap(p->pid);
 873        put_exec_domain(p->thread_info->exec_domain);
 874        if (p->binfmt && p->binfmt->module)
 875                __MOD_DEC_USE_COUNT(p->binfmt->module);
 876bad_fork_cleanup_count:
 877        atomic_dec(&p->user->processes);
 878        free_uid(p->user);
 879bad_fork_free:
 880        put_task_struct(p);
 881        goto fork_out;
 882}
 883
 884/*
 885 *  Ok, this is the main fork-routine.
 886 *
 887 * It copies the process, and if successful kick-starts
 888 * it and waits for it to finish using the VM if required.
 889 */
 890struct task_struct *do_fork(unsigned long clone_flags,
 891                            unsigned long stack_start,
 892                            struct pt_regs *regs,
 893                            unsigned long stack_size,
 894                            int *user_tid)
 895{
 896        struct task_struct *p;
 897
 898        p = copy_process(clone_flags, stack_start, regs, stack_size, user_tid);
 899        if (!IS_ERR(p)) {
 900                struct completion vfork;
 901
 902                if (clone_flags & CLONE_VFORK) {
 903                        p->vfork_done = &vfork;
 904                        init_completion(&vfork);
 905                }
 906
 907                if (p->ptrace & PT_PTRACED)
 908                        send_sig(SIGSTOP, p, 1);
 909
 910                wake_up_forked_process(p);              /* do this last */
 911                ++total_forks;
 912                if (clone_flags & CLONE_VFORK)
 913                        wait_for_completion(&vfork);
 914                else
 915                        /*
 916                         * Let the child process run first, to avoid most of the
 917                         * COW overhead when the child exec()s afterwards.
 918                         */
 919                        set_need_resched();
 920        }
 921        return p;
 922}
 923
 924/* SLAB cache for signal_struct structures (tsk->sig) */
 925kmem_cache_t *sigact_cachep;
 926
 927/* SLAB cache for files_struct structures (tsk->files) */
 928kmem_cache_t *files_cachep;
 929
 930/* SLAB cache for fs_struct structures (tsk->fs) */
 931kmem_cache_t *fs_cachep;
 932
 933/* SLAB cache for vm_area_struct structures */
 934kmem_cache_t *vm_area_cachep;
 935
 936/* SLAB cache for mm_struct structures (tsk->mm) */
 937kmem_cache_t *mm_cachep;
 938
 939void __init proc_caches_init(void)
 940{
 941        sigact_cachep = kmem_cache_create("signal_act",
 942                        sizeof(struct signal_struct), 0,
 943                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 944        if (!sigact_cachep)
 945                panic("Cannot create signal action SLAB cache");
 946
 947        files_cachep = kmem_cache_create("files_cache", 
 948                         sizeof(struct files_struct), 0, 
 949                         SLAB_HWCACHE_ALIGN, NULL, NULL);
 950        if (!files_cachep) 
 951                panic("Cannot create files SLAB cache");
 952
 953        fs_cachep = kmem_cache_create("fs_cache", 
 954                         sizeof(struct fs_struct), 0, 
 955                         SLAB_HWCACHE_ALIGN, NULL, NULL);
 956        if (!fs_cachep) 
 957                panic("Cannot create fs_struct SLAB cache");
 958 
 959        vm_area_cachep = kmem_cache_create("vm_area_struct",
 960                        sizeof(struct vm_area_struct), 0,
 961                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 962        if(!vm_area_cachep)
 963                panic("vma_init: Cannot alloc vm_area_struct SLAB cache");
 964
 965        mm_cachep = kmem_cache_create("mm_struct",
 966                        sizeof(struct mm_struct), 0,
 967                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 968        if(!mm_cachep)
 969                panic("vma_init: Cannot alloc mm_struct SLAB cache");
 970}
 971
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.