linux-old/kernel/fork.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/fork.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 *  'fork.c' contains the help-routines for the 'fork' system call
   9 * (see also entry.S and others).
  10 * Fork is rather simple, once you get the hang of it, but the memory
  11 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
  12 */
  13
  14#include <linux/config.h>
  15#include <linux/slab.h>
  16#include <linux/init.h>
  17#include <linux/unistd.h>
  18#include <linux/smp_lock.h>
  19#include <linux/module.h>
  20#include <linux/vmalloc.h>
  21#include <linux/completion.h>
  22#include <linux/namespace.h>
  23#include <linux/personality.h>
  24#include <linux/compiler.h>
  25
  26#include <asm/pgtable.h>
  27#include <asm/pgalloc.h>
  28#include <asm/uaccess.h>
  29#include <asm/mmu_context.h>
  30#include <asm/processor.h>
  31
  32/* The idle threads do not count.. */
  33int nr_threads;
  34int nr_running;
  35
  36int max_threads;
  37unsigned long total_forks;      /* Handle normal Linux uptimes. */
  38int last_pid;
  39
  40struct task_struct *pidhash[PIDHASH_SZ];
  41
  42void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
  43{
  44        unsigned long flags;
  45
  46        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  47        wq_write_lock_irqsave(&q->lock, flags);
  48        __add_wait_queue(q, wait);
  49        wq_write_unlock_irqrestore(&q->lock, flags);
  50}
  51
  52void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
  53{
  54        unsigned long flags;
  55
  56        wait->flags |= WQ_FLAG_EXCLUSIVE;
  57        wq_write_lock_irqsave(&q->lock, flags);
  58        __add_wait_queue_tail(q, wait);
  59        wq_write_unlock_irqrestore(&q->lock, flags);
  60}
  61
  62void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
  63{
  64        unsigned long flags;
  65
  66        wq_write_lock_irqsave(&q->lock, flags);
  67        __remove_wait_queue(q, wait);
  68        wq_write_unlock_irqrestore(&q->lock, flags);
  69}
  70
  71void __init fork_init(unsigned long mempages)
  72{
  73        /*
  74         * The default maximum number of threads is set to a safe
  75         * value: the thread structures can take up at most half
  76         * of memory.
  77         */
  78        max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
  79
  80        init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  81        init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  82}
  83
  84/* Protects next_safe and last_pid. */
  85spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
  86
  87static int get_pid(unsigned long flags)
  88{
  89        static int next_safe = PID_MAX;
  90        struct task_struct *p;
  91        int pid, beginpid;
  92
  93        if (flags & CLONE_PID)
  94                return current->pid;
  95
  96        spin_lock(&lastpid_lock);
  97        beginpid = last_pid;
  98        if((++last_pid) & 0xffff8000) {
  99                last_pid = 300;         /* Skip daemons etc. */
 100                goto inside;
 101        }
 102        if(last_pid >= next_safe) {
 103inside:
 104                next_safe = PID_MAX;
 105                read_lock(&tasklist_lock);
 106        repeat:
 107                for_each_task(p) {
 108                        if(p->pid == last_pid   ||
 109                           p->pgrp == last_pid  ||
 110                           p->tgid == last_pid  ||
 111                           p->session == last_pid) {
 112                                if(++last_pid >= next_safe) {
 113                                        if(last_pid & 0xffff8000)
 114                                                last_pid = 300;
 115                                        next_safe = PID_MAX;
 116                                }
 117                                if(unlikely(last_pid == beginpid)) {
 118                                        next_safe = 0;
 119                                        goto nomorepids;
 120                                }
 121                                goto repeat;
 122                        }
 123                        if(p->pid > last_pid && next_safe > p->pid)
 124                                next_safe = p->pid;
 125                        if(p->pgrp > last_pid && next_safe > p->pgrp)
 126                                next_safe = p->pgrp;
 127                        if(p->tgid > last_pid && next_safe > p->tgid)
 128                                next_safe = p->tgid;
 129                        if(p->session > last_pid && next_safe > p->session)
 130                                next_safe = p->session;
 131                }
 132                read_unlock(&tasklist_lock);
 133        }
 134        pid = last_pid;
 135        spin_unlock(&lastpid_lock);
 136
 137        return pid;
 138
 139nomorepids:
 140        read_unlock(&tasklist_lock);
 141        spin_unlock(&lastpid_lock);
 142        return 0;
 143}
 144
 145static inline int dup_mmap(struct mm_struct * mm)
 146{
 147        struct vm_area_struct * mpnt, *tmp, **pprev;
 148        int retval;
 149
 150        flush_cache_mm(current->mm);
 151        mm->locked_vm = 0;
 152        mm->mmap = NULL;
 153        mm->mmap_cache = NULL;
 154        mm->map_count = 0;
 155        mm->rss = 0;
 156        mm->cpu_vm_mask = 0;
 157        mm->swap_address = 0;
 158        pprev = &mm->mmap;
 159
 160        /*
 161         * Add it to the mmlist after the parent.
 162         * Doing it this way means that we can order the list,
 163         * and fork() won't mess up the ordering significantly.
 164         * Add it first so that swapoff can see any swap entries.
 165         */
 166        spin_lock(&mmlist_lock);
 167        list_add(&mm->mmlist, &current->mm->mmlist);
 168        mmlist_nr++;
 169        spin_unlock(&mmlist_lock);
 170
 171        for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 172                struct file *file;
 173
 174                retval = -ENOMEM;
 175                if(mpnt->vm_flags & VM_DONTCOPY)
 176                        continue;
 177                tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 178                if (!tmp)
 179                        goto fail_nomem;
 180                *tmp = *mpnt;
 181                tmp->vm_flags &= ~VM_LOCKED;
 182                tmp->vm_mm = mm;
 183                tmp->vm_next = NULL;
 184                file = tmp->vm_file;
 185                if (file) {
 186                        struct inode *inode = file->f_dentry->d_inode;
 187                        get_file(file);
 188                        if (tmp->vm_flags & VM_DENYWRITE)
 189                                atomic_dec(&inode->i_writecount);
 190      
 191                        /* insert tmp into the share list, just after mpnt */
 192                        spin_lock(&inode->i_mapping->i_shared_lock);
 193                        if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
 194                                mpnt->vm_next_share->vm_pprev_share =
 195                                        &tmp->vm_next_share;
 196                        mpnt->vm_next_share = tmp;
 197                        tmp->vm_pprev_share = &mpnt->vm_next_share;
 198                        spin_unlock(&inode->i_mapping->i_shared_lock);
 199                }
 200
 201                /*
 202                 * Link in the new vma and copy the page table entries:
 203                 * link in first so that swapoff can see swap entries.
 204                 */
 205                spin_lock(&mm->page_table_lock);
 206                *pprev = tmp;
 207                pprev = &tmp->vm_next;
 208                mm->map_count++;
 209                retval = copy_page_range(mm, current->mm, tmp);
 210                spin_unlock(&mm->page_table_lock);
 211
 212                if (tmp->vm_ops && tmp->vm_ops->open)
 213                        tmp->vm_ops->open(tmp);
 214
 215                if (retval)
 216                        goto fail_nomem;
 217        }
 218        retval = 0;
 219        build_mmap_rb(mm);
 220
 221fail_nomem:
 222        flush_tlb_mm(current->mm);
 223        return retval;
 224}
 225
 226spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
 227int mmlist_nr;
 228
 229#define allocate_mm()   (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 230#define free_mm(mm)     (kmem_cache_free(mm_cachep, (mm)))
 231
 232static struct mm_struct * mm_init(struct mm_struct * mm)
 233{
 234        atomic_set(&mm->mm_users, 1);
 235        atomic_set(&mm->mm_count, 1);
 236        init_rwsem(&mm->mmap_sem);
 237        mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 238        mm->pgd = pgd_alloc(mm);
 239        mm->def_flags = 0;
 240        if (mm->pgd)
 241                return mm;
 242        free_mm(mm);
 243        return NULL;
 244}
 245        
 246
 247/*
 248 * Allocate and initialize an mm_struct.
 249 */
 250struct mm_struct * mm_alloc(void)
 251{
 252        struct mm_struct * mm;
 253
 254        mm = allocate_mm();
 255        if (mm) {
 256                memset(mm, 0, sizeof(*mm));
 257                return mm_init(mm);
 258        }
 259        return NULL;
 260}
 261
 262/*
 263 * Called when the last reference to the mm
 264 * is dropped: either by a lazy thread or by
 265 * mmput. Free the page directory and the mm.
 266 */
 267void fastcall __mmdrop(struct mm_struct *mm)
 268{
 269        BUG_ON(mm == &init_mm);
 270        pgd_free(mm->pgd);
 271        check_pgt_cache();
 272        destroy_context(mm);
 273        free_mm(mm);
 274}
 275
 276/*
 277 * Decrement the use count and release all resources for an mm.
 278 */
 279void mmput(struct mm_struct *mm)
 280{
 281        if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
 282                extern struct mm_struct *swap_mm;
 283                if (swap_mm == mm)
 284                        swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
 285                list_del(&mm->mmlist);
 286                mmlist_nr--;
 287                spin_unlock(&mmlist_lock);
 288                exit_mmap(mm);
 289                mmdrop(mm);
 290        }
 291}
 292
 293/* Please note the differences between mmput and mm_release.
 294 * mmput is called whenever we stop holding onto a mm_struct,
 295 * error success whatever.
 296 *
 297 * mm_release is called after a mm_struct has been removed
 298 * from the current process.
 299 *
 300 * This difference is important for error handling, when we
 301 * only half set up a mm_struct for a new process and need to restore
 302 * the old one.  Because we mmput the new mm_struct before
 303 * restoring the old one. . .
 304 * Eric Biederman 10 January 1998
 305 */
 306void mm_release(void)
 307{
 308        struct task_struct *tsk = current;
 309        struct completion *vfork_done = tsk->vfork_done;
 310
 311        /* notify parent sleeping on vfork() */
 312        if (vfork_done) {
 313                tsk->vfork_done = NULL;
 314                complete(vfork_done);
 315        }
 316}
 317
 318static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 319{
 320        struct mm_struct * mm, *oldmm;
 321        int retval;
 322
 323        tsk->min_flt = tsk->maj_flt = 0;
 324        tsk->cmin_flt = tsk->cmaj_flt = 0;
 325        tsk->nswap = tsk->cnswap = 0;
 326
 327        tsk->mm = NULL;
 328        tsk->active_mm = NULL;
 329
 330        /*
 331         * Are we cloning a kernel thread?
 332         *
 333         * We need to steal a active VM for that..
 334         */
 335        oldmm = current->mm;
 336        if (!oldmm)
 337                return 0;
 338
 339        if (clone_flags & CLONE_VM) {
 340                atomic_inc(&oldmm->mm_users);
 341                mm = oldmm;
 342                goto good_mm;
 343        }
 344
 345        retval = -ENOMEM;
 346        mm = allocate_mm();
 347        if (!mm)
 348                goto fail_nomem;
 349
 350        /* Copy the current MM stuff.. */
 351        memcpy(mm, oldmm, sizeof(*mm));
 352        if (!mm_init(mm))
 353                goto fail_nomem;
 354
 355        if (init_new_context(tsk,mm))
 356                goto free_pt;
 357
 358        down_write(&oldmm->mmap_sem);
 359        retval = dup_mmap(mm);
 360        up_write(&oldmm->mmap_sem);
 361
 362        if (retval)
 363                goto free_pt;
 364
 365        /*
 366         * child gets a private LDT (if there was an LDT in the parent)
 367         */
 368        copy_segments(tsk, mm);
 369
 370good_mm:
 371        tsk->mm = mm;
 372        tsk->active_mm = mm;
 373        return 0;
 374
 375free_pt:
 376        mmput(mm);
 377fail_nomem:
 378        return retval;
 379}
 380
 381static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 382{
 383        struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 384        /* We don't need to lock fs - think why ;-) */
 385        if (fs) {
 386                atomic_set(&fs->count, 1);
 387                fs->lock = RW_LOCK_UNLOCKED;
 388                fs->umask = old->umask;
 389                read_lock(&old->lock);
 390                fs->rootmnt = mntget(old->rootmnt);
 391                fs->root = dget(old->root);
 392                fs->pwdmnt = mntget(old->pwdmnt);
 393                fs->pwd = dget(old->pwd);
 394                if (old->altroot) {
 395                        fs->altrootmnt = mntget(old->altrootmnt);
 396                        fs->altroot = dget(old->altroot);
 397                } else {
 398                        fs->altrootmnt = NULL;
 399                        fs->altroot = NULL;
 400                }       
 401                read_unlock(&old->lock);
 402        }
 403        return fs;
 404}
 405
 406struct fs_struct *copy_fs_struct(struct fs_struct *old)
 407{
 408        return __copy_fs_struct(old);
 409}
 410
 411static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 412{
 413        if (clone_flags & CLONE_FS) {
 414                atomic_inc(&current->fs->count);
 415                return 0;
 416        }
 417        tsk->fs = __copy_fs_struct(current->fs);
 418        if (!tsk->fs)
 419                return -1;
 420        return 0;
 421}
 422
 423static int count_open_files(struct files_struct *files, int size)
 424{
 425        int i;
 426        
 427        /* Find the last open fd */
 428        for (i = size/(8*sizeof(long)); i > 0; ) {
 429                if (files->open_fds->fds_bits[--i])
 430                        break;
 431        }
 432        i = (i+1) * 8 * sizeof(long);
 433        return i;
 434}
 435
 436static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 437{
 438        struct files_struct *oldf, *newf;
 439        struct file **old_fds, **new_fds;
 440        int open_files, nfds, size, i, error = 0;
 441
 442        /*
 443         * A background process may not have any files ...
 444         */
 445        oldf = current->files;
 446        if (!oldf)
 447                goto out;
 448
 449        if (clone_flags & CLONE_FILES) {
 450                atomic_inc(&oldf->count);
 451                goto out;
 452        }
 453
 454        /*
 455         * Note: we may be using current for both targets (See exec.c)
 456         * This works because we cache current->files (old) as oldf. Don't
 457         * break this.
 458         */
 459        tsk->files = NULL;
 460        error = -ENOMEM;
 461        newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 462        if (!newf) 
 463                goto out;
 464
 465        atomic_set(&newf->count, 1);
 466
 467        newf->file_lock     = RW_LOCK_UNLOCKED;
 468        newf->next_fd       = 0;
 469        newf->max_fds       = NR_OPEN_DEFAULT;
 470        newf->max_fdset     = __FD_SETSIZE;
 471        newf->close_on_exec = &newf->close_on_exec_init;
 472        newf->open_fds      = &newf->open_fds_init;
 473        newf->fd            = &newf->fd_array[0];
 474
 475        /* We don't yet have the oldf readlock, but even if the old
 476           fdset gets grown now, we'll only copy up to "size" fds */
 477        size = oldf->max_fdset;
 478        if (size > __FD_SETSIZE) {
 479                newf->max_fdset = 0;
 480                write_lock(&newf->file_lock);
 481                error = expand_fdset(newf, size-1);
 482                write_unlock(&newf->file_lock);
 483                if (error)
 484                        goto out_release;
 485        }
 486        read_lock(&oldf->file_lock);
 487
 488        open_files = count_open_files(oldf, size);
 489
 490        /*
 491         * Check whether we need to allocate a larger fd array.
 492         * Note: we're not a clone task, so the open count won't
 493         * change.
 494         */
 495        nfds = NR_OPEN_DEFAULT;
 496        if (open_files > nfds) {
 497                read_unlock(&oldf->file_lock);
 498                newf->max_fds = 0;
 499                write_lock(&newf->file_lock);
 500                error = expand_fd_array(newf, open_files-1);
 501                write_unlock(&newf->file_lock);
 502                if (error) 
 503                        goto out_release;
 504                nfds = newf->max_fds;
 505                read_lock(&oldf->file_lock);
 506        }
 507
 508        old_fds = oldf->fd;
 509        new_fds = newf->fd;
 510
 511        memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 512        memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 513
 514        for (i = open_files; i != 0; i--) {
 515                struct file *f = *old_fds++;
 516                if (f) {
 517                        get_file(f);
 518                } else {
 519                        /*
 520                         * The fd may be claimed in the fd bitmap but not yet
 521                         * instantiated in the files array if a sibling thread
 522                         * is partway through open().  So make sure that this
 523                         * fd is available to the new process.
 524                         */
 525                        FD_CLR(open_files - i, newf->open_fds);
 526                }
 527                *new_fds++ = f;
 528        }
 529        read_unlock(&oldf->file_lock);
 530
 531        /* compute the remainder to be cleared */
 532        size = (newf->max_fds - open_files) * sizeof(struct file *);
 533
 534        /* This is long word aligned thus could use a optimized version */ 
 535        memset(new_fds, 0, size); 
 536
 537        if (newf->max_fdset > open_files) {
 538                int left = (newf->max_fdset-open_files)/8;
 539                int start = open_files / (8 * sizeof(unsigned long));
 540                
 541                memset(&newf->open_fds->fds_bits[start], 0, left);
 542                memset(&newf->close_on_exec->fds_bits[start], 0, left);
 543        }
 544
 545        tsk->files = newf;
 546        error = 0;
 547out:
 548        return error;
 549
 550out_release:
 551        free_fdset (newf->close_on_exec, newf->max_fdset);
 552        free_fdset (newf->open_fds, newf->max_fdset);
 553        kmem_cache_free(files_cachep, newf);
 554        goto out;
 555}
 556
 557/*
 558 *      Helper to unshare the files of the current task. 
 559 *      We don't want to expose copy_files internals to 
 560 *      the exec layer of the kernel.
 561 */
 562
 563int unshare_files(void)
 564{
 565        struct files_struct *files  = current->files;
 566        int rc;
 567        
 568        if(!files)
 569                BUG();
 570                
 571        /* This can race but the race causes us to copy when we don't
 572           need to and drop the copy */
 573        if(atomic_read(&files->count) == 1)
 574        {
 575                atomic_inc(&files->count);
 576                return 0;
 577        }
 578        rc = copy_files(0, current);
 579        if(rc)
 580                current->files = files;
 581        return rc;
 582}               
 583
 584static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 585{
 586        struct signal_struct *sig;
 587
 588        if (clone_flags & CLONE_SIGHAND) {
 589                atomic_inc(&current->sig->count);
 590                return 0;
 591        }
 592        sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
 593        tsk->sig = sig;
 594        if (!sig)
 595                return -1;
 596        spin_lock_init(&sig->siglock);
 597        atomic_set(&sig->count, 1);
 598        memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
 599        return 0;
 600}
 601
 602static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 603{
 604        unsigned long new_flags = p->flags;
 605
 606        new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU);
 607        new_flags |= PF_FORKNOEXEC;
 608        if (!(clone_flags & CLONE_PTRACE))
 609                p->ptrace = 0;
 610        p->flags = new_flags;
 611}
 612
 613long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 614{
 615        struct task_struct *task = current;
 616        unsigned old_task_dumpable;
 617        long ret;
 618
 619        /* lock out any potential ptracer */
 620        task_lock(task);
 621        if (task->ptrace) {
 622                task_unlock(task);
 623                return -EPERM;
 624        }
 625
 626        old_task_dumpable = task->task_dumpable;
 627        task->task_dumpable = 0;
 628        task_unlock(task);
 629
 630        ret = arch_kernel_thread(fn, arg, flags);
 631
 632        /* never reached in child process, only in parent */
 633        current->task_dumpable = old_task_dumpable;
 634
 635        return ret;
 636}
 637
 638/*
 639 *  Ok, this is the main fork-routine. It copies the system process
 640 * information (task[nr]) and sets up the necessary registers. It also
 641 * copies the data segment in its entirety.  The "stack_start" and
 642 * "stack_top" arguments are simply passed along to the platform
 643 * specific copy_thread() routine.  Most platforms ignore stack_top.
 644 * For an example that's using stack_top, see
 645 * arch/ia64/kernel/process.c.
 646 */
 647int do_fork(unsigned long clone_flags, unsigned long stack_start,
 648            struct pt_regs *regs, unsigned long stack_size)
 649{
 650        int retval;
 651        struct task_struct *p;
 652        struct completion vfork;
 653
 654        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 655                return -EINVAL;
 656
 657        retval = -EPERM;
 658
 659        /* 
 660         * CLONE_PID is only allowed for the initial SMP swapper
 661         * calls
 662         */
 663        if (clone_flags & CLONE_PID) {
 664                if (current->pid)
 665                        goto fork_out;
 666        }
 667
 668        retval = -ENOMEM;
 669        p = alloc_task_struct();
 670        if (!p)
 671                goto fork_out;
 672
 673        *p = *current;
 674
 675        retval = -EAGAIN;
 676        /*
 677         * Check if we are over our maximum process limit, but be sure to
 678         * exclude root. This is needed to make it possible for login and
 679         * friends to set the per-user process limit to something lower
 680         * than the amount of processes root is running. -- Rik
 681         */
 682        if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur
 683                      && p->user != &root_user
 684                      && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
 685                goto bad_fork_free;
 686
 687        atomic_inc(&p->user->__count);
 688        atomic_inc(&p->user->processes);
 689
 690        /*
 691         * Counter increases are protected by
 692         * the kernel lock so nr_threads can't
 693         * increase under us (but it may decrease).
 694         */
 695        if (nr_threads >= max_threads)
 696                goto bad_fork_cleanup_count;
 697        
 698        get_exec_domain(p->exec_domain);
 699
 700        if (p->binfmt && p->binfmt->module)
 701                __MOD_INC_USE_COUNT(p->binfmt->module);
 702
 703        p->did_exec = 0;
 704        p->swappable = 0;
 705        p->state = TASK_UNINTERRUPTIBLE;
 706
 707        copy_flags(clone_flags, p);
 708        p->pid = get_pid(clone_flags);
 709        if (p->pid == 0 && current->pid != 0)
 710                goto bad_fork_cleanup;
 711
 712        p->run_list.next = NULL;
 713        p->run_list.prev = NULL;
 714
 715        p->p_cptr = NULL;
 716        init_waitqueue_head(&p->wait_chldexit);
 717        p->vfork_done = NULL;
 718        if (clone_flags & CLONE_VFORK) {
 719                p->vfork_done = &vfork;
 720                init_completion(&vfork);
 721        }
 722        spin_lock_init(&p->alloc_lock);
 723
 724        p->sigpending = 0;
 725        init_sigpending(&p->pending);
 726
 727        p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
 728        p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
 729        init_timer(&p->real_timer);
 730        p->real_timer.data = (unsigned long) p;
 731
 732        p->leader = 0;          /* session leadership doesn't inherit */
 733        p->tty_old_pgrp = 0;
 734        p->times.tms_utime = p->times.tms_stime = 0;
 735        p->times.tms_cutime = p->times.tms_cstime = 0;
 736#ifdef CONFIG_SMP
 737        {
 738                int i;
 739                p->cpus_runnable = ~0UL;
 740                p->processor = current->processor;
 741                /* ?? should we just memset this ?? */
 742                for(i = 0; i < smp_num_cpus; i++)
 743                        p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
 744                spin_lock_init(&p->sigmask_lock);
 745        }
 746#endif
 747        p->lock_depth = -1;             /* -1 = no lock */
 748        p->start_time = jiffies;
 749
 750        INIT_LIST_HEAD(&p->local_pages);
 751
 752        retval = -ENOMEM;
 753        /* copy all the process information */
 754        if (copy_files(clone_flags, p))
 755                goto bad_fork_cleanup;
 756        if (copy_fs(clone_flags, p))
 757                goto bad_fork_cleanup_files;
 758        if (copy_sighand(clone_flags, p))
 759                goto bad_fork_cleanup_fs;
 760        if (copy_mm(clone_flags, p))
 761                goto bad_fork_cleanup_sighand;
 762        retval = copy_namespace(clone_flags, p);
 763        if (retval)
 764                goto bad_fork_cleanup_mm;
 765        retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 766        if (retval)
 767                goto bad_fork_cleanup_namespace;
 768        p->semundo = NULL;
 769        
 770        /* Our parent execution domain becomes current domain
 771           These must match for thread signalling to apply */
 772           
 773        p->parent_exec_id = p->self_exec_id;
 774
 775        /* ok, now we should be set up.. */
 776        p->swappable = 1;
 777        p->exit_signal = clone_flags & CSIGNAL;
 778        p->pdeath_signal = 0;
 779
 780        /*
 781         * "share" dynamic priority between parent and child, thus the
 782         * total amount of dynamic priorities in the system doesn't change,
 783         * more scheduling fairness. This is only important in the first
 784         * timeslice, on the long run the scheduling behaviour is unchanged.
 785         */
 786        p->counter = (current->counter + 1) >> 1;
 787        current->counter >>= 1;
 788        if (!current->counter)
 789                current->need_resched = 1;
 790
 791        /*
 792         * Ok, add it to the run-queues and make it
 793         * visible to the rest of the system.
 794         *
 795         * Let it rip!
 796         */
 797        retval = p->pid;
 798        p->tgid = retval;
 799        INIT_LIST_HEAD(&p->thread_group);
 800
 801        /* Need tasklist lock for parent etc handling! */
 802        write_lock_irq(&tasklist_lock);
 803
 804        /* CLONE_PARENT re-uses the old parent */
 805        p->p_opptr = current->p_opptr;
 806        p->p_pptr = current->p_pptr;
 807        if (!(clone_flags & CLONE_PARENT)) {
 808                p->p_opptr = current;
 809                if (!(p->ptrace & PT_PTRACED))
 810                        p->p_pptr = current;
 811        }
 812
 813        if (clone_flags & CLONE_THREAD) {
 814                p->tgid = current->tgid;
 815                list_add(&p->thread_group, &current->thread_group);
 816        }
 817
 818        SET_LINKS(p);
 819        hash_pid(p);
 820        nr_threads++;
 821        write_unlock_irq(&tasklist_lock);
 822
 823        if (p->ptrace & PT_PTRACED)
 824                send_sig(SIGSTOP, p, 1);
 825
 826        wake_up_process(p);             /* do this last */
 827        ++total_forks;
 828        if (clone_flags & CLONE_VFORK)
 829                wait_for_completion(&vfork);
 830
 831fork_out:
 832        return retval;
 833
 834bad_fork_cleanup_namespace:
 835        exit_namespace(p);
 836bad_fork_cleanup_mm:
 837        exit_mm(p);
 838        if (p->active_mm)
 839                mmdrop(p->active_mm);
 840bad_fork_cleanup_sighand:
 841        exit_sighand(p);
 842bad_fork_cleanup_fs:
 843        exit_fs(p); /* blocking */
 844bad_fork_cleanup_files:
 845        exit_files(p); /* blocking */
 846bad_fork_cleanup:
 847        put_exec_domain(p->exec_domain);
 848        if (p->binfmt && p->binfmt->module)
 849                __MOD_DEC_USE_COUNT(p->binfmt->module);
 850bad_fork_cleanup_count:
 851        atomic_dec(&p->user->processes);
 852        free_uid(p->user);
 853bad_fork_free:
 854        free_task_struct(p);
 855        goto fork_out;
 856}
 857
 858/* SLAB cache for signal_struct structures (tsk->sig) */
 859kmem_cache_t *sigact_cachep;
 860
 861/* SLAB cache for files_struct structures (tsk->files) */
 862kmem_cache_t *files_cachep;
 863
 864/* SLAB cache for fs_struct structures (tsk->fs) */
 865kmem_cache_t *fs_cachep;
 866
 867/* SLAB cache for vm_area_struct structures */
 868kmem_cache_t *vm_area_cachep;
 869
 870/* SLAB cache for mm_struct structures (tsk->mm) */
 871kmem_cache_t *mm_cachep;
 872
 873void __init proc_caches_init(void)
 874{
 875        sigact_cachep = kmem_cache_create("signal_act",
 876                        sizeof(struct signal_struct), 0,
 877                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 878        if (!sigact_cachep)
 879                panic("Cannot create signal action SLAB cache");
 880
 881        files_cachep = kmem_cache_create("files_cache", 
 882                         sizeof(struct files_struct), 0, 
 883                         SLAB_HWCACHE_ALIGN, NULL, NULL);
 884        if (!files_cachep) 
 885                panic("Cannot create files SLAB cache");
 886
 887        fs_cachep = kmem_cache_create("fs_cache", 
 888                         sizeof(struct fs_struct), 0, 
 889                         SLAB_HWCACHE_ALIGN, NULL, NULL);
 890        if (!fs_cachep) 
 891                panic("Cannot create fs_struct SLAB cache");
 892 
 893        vm_area_cachep = kmem_cache_create("vm_area_struct",
 894                        sizeof(struct vm_area_struct), 0,
 895                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 896        if(!vm_area_cachep)
 897                panic("vma_init: Cannot alloc vm_area_struct SLAB cache");
 898
 899        mm_cachep = kmem_cache_create("mm_struct",
 900                        sizeof(struct mm_struct), 0,
 901                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 902        if(!mm_cachep)
 903                panic("vma_init: Cannot alloc mm_struct SLAB cache");
 904}
 905
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.