linux-bk/fs/exec.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/exec.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 * #!-checking implemented by tytso.
   9 */
  10/*
  11 * Demand-loading implemented 01.12.91 - no need to read anything but
  12 * the header into memory. The inode of the executable is put into
  13 * "current->executable", and page faults do the actual loading. Clean.
  14 *
  15 * Once more I can proudly say that linux stood up to being changed: it
  16 * was less than 2 hours work to get demand-loading completely implemented.
  17 *
  18 * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
  19 * current->executable is only used by the procfs.  This allows a dispatch
  20 * table to check for several different types  of binary formats.  We keep
  21 * trying until we recognize the file or we run out of supported binary
  22 * formats. 
  23 */
  24
  25#include <linux/config.h>
  26#include <linux/slab.h>
  27#include <linux/file.h>
  28#include <linux/mman.h>
  29#include <linux/a.out.h>
  30#include <linux/stat.h>
  31#include <linux/fcntl.h>
  32#include <linux/smp_lock.h>
  33#include <linux/init.h>
  34#include <linux/pagemap.h>
  35#include <linux/highmem.h>
  36#include <linux/spinlock.h>
  37#include <linux/personality.h>
  38#include <linux/binfmts.h>
  39#include <linux/swap.h>
  40#define __NO_VERSION__
  41#include <linux/module.h>
  42#include <linux/namei.h>
  43#include <linux/proc_fs.h>
  44#include <linux/ptrace.h>
  45
  46#include <asm/uaccess.h>
  47#include <asm/pgalloc.h>
  48#include <asm/mmu_context.h>
  49
  50#ifdef CONFIG_KMOD
  51#include <linux/kmod.h>
  52#endif
  53
  54int core_uses_pid;
  55
  56static struct linux_binfmt *formats;
  57static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
  58
  59int register_binfmt(struct linux_binfmt * fmt)
  60{
  61        struct linux_binfmt ** tmp = &formats;
  62
  63        if (!fmt)
  64                return -EINVAL;
  65        if (fmt->next)
  66                return -EBUSY;
  67        write_lock(&binfmt_lock);
  68        while (*tmp) {
  69                if (fmt == *tmp) {
  70                        write_unlock(&binfmt_lock);
  71                        return -EBUSY;
  72                }
  73                tmp = &(*tmp)->next;
  74        }
  75        fmt->next = formats;
  76        formats = fmt;
  77        write_unlock(&binfmt_lock);
  78        return 0;       
  79}
  80
  81int unregister_binfmt(struct linux_binfmt * fmt)
  82{
  83        struct linux_binfmt ** tmp = &formats;
  84
  85        write_lock(&binfmt_lock);
  86        while (*tmp) {
  87                if (fmt == *tmp) {
  88                        *tmp = fmt->next;
  89                        write_unlock(&binfmt_lock);
  90                        return 0;
  91                }
  92                tmp = &(*tmp)->next;
  93        }
  94        write_unlock(&binfmt_lock);
  95        return -EINVAL;
  96}
  97
  98static inline void put_binfmt(struct linux_binfmt * fmt)
  99{
 100        if (fmt->module)
 101                __MOD_DEC_USE_COUNT(fmt->module);
 102}
 103
 104/*
 105 * Note that a shared library must be both readable and executable due to
 106 * security reasons.
 107 *
 108 * Also note that we take the address to load from from the file itself.
 109 */
 110asmlinkage long sys_uselib(const char * library)
 111{
 112        struct file * file;
 113        struct nameidata nd;
 114        int error;
 115
 116        error = user_path_walk(library, &nd);
 117        if (error)
 118                goto out;
 119
 120        error = -EINVAL;
 121        if (!S_ISREG(nd.dentry->d_inode->i_mode))
 122                goto exit;
 123
 124        error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
 125        if (error)
 126                goto exit;
 127
 128        file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 129        error = PTR_ERR(file);
 130        if (IS_ERR(file))
 131                goto out;
 132
 133        error = -ENOEXEC;
 134        if(file->f_op && file->f_op->read) {
 135                struct linux_binfmt * fmt;
 136
 137                read_lock(&binfmt_lock);
 138                for (fmt = formats ; fmt ; fmt = fmt->next) {
 139                        if (!fmt->load_shlib)
 140                                continue;
 141                        if (!try_inc_mod_count(fmt->module))
 142                                continue;
 143                        read_unlock(&binfmt_lock);
 144                        error = fmt->load_shlib(file);
 145                        read_lock(&binfmt_lock);
 146                        put_binfmt(fmt);
 147                        if (error != -ENOEXEC)
 148                                break;
 149                }
 150                read_unlock(&binfmt_lock);
 151        }
 152        fput(file);
 153out:
 154        return error;
 155exit:
 156        path_release(&nd);
 157        goto out;
 158}
 159
 160/*
 161 * count() counts the number of strings in array ARGV.
 162 */
 163static int count(char ** argv, int max)
 164{
 165        int i = 0;
 166
 167        if (argv != NULL) {
 168                for (;;) {
 169                        char * p;
 170
 171                        if (get_user(p, argv))
 172                                return -EFAULT;
 173                        if (!p)
 174                                break;
 175                        argv++;
 176                        if(++i > max)
 177                                return -E2BIG;
 178                }
 179        }
 180        return i;
 181}
 182
 183/*
 184 * 'copy_strings()' copies argument/environment strings from user
 185 * memory to free pages in kernel mem. These are in a format ready
 186 * to be put directly into the top of new user memory.
 187 */
 188int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
 189{
 190        struct page *kmapped_page = NULL;
 191        char *kaddr = NULL;
 192        int ret;
 193
 194        while (argc-- > 0) {
 195                char *str;
 196                int len;
 197                unsigned long pos;
 198
 199                if (get_user(str, argv+argc) ||
 200                                !(len = strnlen_user(str, bprm->p))) {
 201                        ret = -EFAULT;
 202                        goto out;
 203                }
 204
 205                if (bprm->p < len)  {
 206                        ret = -E2BIG;
 207                        goto out;
 208                }
 209
 210                bprm->p -= len;
 211                /* XXX: add architecture specific overflow check here. */ 
 212                pos = bprm->p;
 213
 214                while (len > 0) {
 215                        int i, new, err;
 216                        int offset, bytes_to_copy;
 217                        struct page *page;
 218
 219                        offset = pos % PAGE_SIZE;
 220                        i = pos/PAGE_SIZE;
 221                        page = bprm->page[i];
 222                        new = 0;
 223                        if (!page) {
 224                                page = alloc_page(GFP_HIGHUSER);
 225                                bprm->page[i] = page;
 226                                if (!page) {
 227                                        ret = -ENOMEM;
 228                                        goto out;
 229                                }
 230                                new = 1;
 231                        }
 232
 233                        if (page != kmapped_page) {
 234                                if (kmapped_page)
 235                                        kunmap(kmapped_page);
 236                                kmapped_page = page;
 237                                kaddr = kmap(kmapped_page);
 238                        }
 239                        if (new && offset)
 240                                memset(kaddr, 0, offset);
 241                        bytes_to_copy = PAGE_SIZE - offset;
 242                        if (bytes_to_copy > len) {
 243                                bytes_to_copy = len;
 244                                if (new)
 245                                        memset(kaddr+offset+len, 0,
 246                                                PAGE_SIZE-offset-len);
 247                        }
 248                        err = copy_from_user(kaddr+offset, str, bytes_to_copy);
 249                        if (err) {
 250                                ret = -EFAULT;
 251                                goto out;
 252                        }
 253
 254                        pos += bytes_to_copy;
 255                        str += bytes_to_copy;
 256                        len -= bytes_to_copy;
 257                }
 258        }
 259        ret = 0;
 260out:
 261        if (kmapped_page)
 262                kunmap(kmapped_page);
 263        return ret;
 264}
 265
 266/*
 267 * Like copy_strings, but get argv and its values from kernel memory.
 268 */
 269int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
 270{
 271        int r;
 272        mm_segment_t oldfs = get_fs();
 273        set_fs(KERNEL_DS); 
 274        r = copy_strings(argc, argv, bprm);
 275        set_fs(oldfs);
 276        return r; 
 277}
 278
 279/*
 280 * This routine is used to map in a page into an address space: needed by
 281 * execve() for the initial stack and environment pages.
 282 *
 283 * tsk->mmap_sem is held for writing.
 284 */
 285void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
 286{
 287        pgd_t * pgd;
 288        pmd_t * pmd;
 289        pte_t * pte;
 290
 291        if (page_count(page) != 1)
 292                printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
 293        pgd = pgd_offset(tsk->mm, address);
 294
 295        spin_lock(&tsk->mm->page_table_lock);
 296        pmd = pmd_alloc(tsk->mm, pgd, address);
 297        if (!pmd)
 298                goto out;
 299        pte = pte_alloc_map(tsk->mm, pmd, address);
 300        if (!pte)
 301                goto out;
 302        if (!pte_none(*pte)) {
 303                pte_unmap(pte);
 304                goto out;
 305        }
 306        lru_cache_add(page);
 307        flush_dcache_page(page);
 308        flush_page_to_ram(page);
 309        set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
 310        page_add_rmap(page, pte);
 311        pte_unmap(pte);
 312        tsk->mm->rss++;
 313        spin_unlock(&tsk->mm->page_table_lock);
 314
 315        /* no need for flush_tlb */
 316        return;
 317out:
 318        spin_unlock(&tsk->mm->page_table_lock);
 319        __free_page(page);
 320        force_sig(SIGKILL, tsk);
 321        return;
 322}
 323
 324int setup_arg_pages(struct linux_binprm *bprm)
 325{
 326        unsigned long stack_base;
 327        struct vm_area_struct *mpnt;
 328        struct mm_struct *mm = current->mm;
 329        int i;
 330
 331#ifdef ARCH_STACK_GROWSUP
 332        /* Move the argument and environment strings to the bottom of the
 333         * stack space.
 334         */
 335        int offset, j;
 336        char *to, *from;
 337
 338        /* Start by shifting all the pages down */
 339        i = 0;
 340        for (j = 0; j < MAX_ARG_PAGES; j++) {
 341                struct page *page = bprm->page[j];
 342                if (!page)
 343                        continue;
 344                bprm->page[i++] = page;
 345        }
 346
 347        /* Now move them within their pages */
 348        offset = bprm->p % PAGE_SIZE;
 349        to = kmap(bprm->page[0]);
 350        for (j = 1; j < i; j++) {
 351                memmove(to, to + offset, PAGE_SIZE - offset);
 352                from = kmap(bprm->page[j]);
 353                memcpy(to + PAGE_SIZE - offset, from, offset);
 354                kunmap(bprm[j - 1]);
 355                to = from;
 356        }
 357        memmove(to, to + offset, PAGE_SIZE - offset);
 358        kunmap(bprm[j - 1]);
 359
 360        /* Adjust bprm->p to point to the end of the strings. */
 361        bprm->p = PAGE_SIZE * i - offset;
 362        stack_base = STACK_TOP - current->rlim[RLIMIT_STACK].rlim_max;
 363        mm->arg_start = stack_base;
 364
 365        /* zero pages that were copied above */
 366        while (i < MAX_ARG_PAGES)
 367                bprm->page[i++] = NULL;
 368#else
 369        stack_base = STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE;
 370        mm->arg_start = bprm->p + stack_base;
 371#endif
 372
 373        bprm->p += stack_base;
 374        if (bprm->loader)
 375                bprm->loader += stack_base;
 376        bprm->exec += stack_base;
 377
 378        mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 379        if (!mpnt)
 380                return -ENOMEM;
 381
 382        if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
 383                kmem_cache_free(vm_area_cachep, mpnt);
 384                return -ENOMEM;
 385        }
 386
 387        down_write(&mm->mmap_sem);
 388        {
 389                mpnt->vm_mm = mm;
 390#ifdef ARCH_STACK_GROWSUP
 391                mpnt->vm_start = stack_base;
 392                mpnt->vm_end = PAGE_MASK &
 393                        (PAGE_SIZE - 1 + (unsigned long) bprm->p);
 394#else
 395                mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
 396                mpnt->vm_end = STACK_TOP;
 397#endif
 398                mpnt->vm_page_prot = PAGE_COPY;
 399                mpnt->vm_flags = VM_STACK_FLAGS;
 400                mpnt->vm_ops = NULL;
 401                mpnt->vm_pgoff = 0;
 402                mpnt->vm_file = NULL;
 403                mpnt->vm_private_data = (void *) 0;
 404                insert_vm_struct(mm, mpnt);
 405                mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 406        } 
 407
 408        for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
 409                struct page *page = bprm->page[i];
 410                if (page) {
 411                        bprm->page[i] = NULL;
 412                        put_dirty_page(current,page,stack_base);
 413                }
 414                stack_base += PAGE_SIZE;
 415        }
 416        up_write(&mm->mmap_sem);
 417        
 418        return 0;
 419}
 420
 421struct file *open_exec(const char *name)
 422{
 423        struct nameidata nd;
 424        int err = path_lookup(name, LOOKUP_FOLLOW, &nd);
 425        struct file *file = ERR_PTR(err);
 426
 427        if (!err) {
 428                struct inode *inode = nd.dentry->d_inode;
 429                file = ERR_PTR(-EACCES);
 430                if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
 431                    S_ISREG(inode->i_mode)) {
 432                        int err = permission(inode, MAY_EXEC);
 433                        if (!err && !(inode->i_mode & 0111))
 434                                err = -EACCES;
 435                        file = ERR_PTR(err);
 436                        if (!err) {
 437                                file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 438                                if (!IS_ERR(file)) {
 439                                        err = deny_write_access(file);
 440                                        if (err) {
 441                                                fput(file);
 442                                                file = ERR_PTR(err);
 443                                        }
 444                                }
 445out:
 446                                return file;
 447                        }
 448                }
 449                path_release(&nd);
 450        }
 451        goto out;
 452}
 453
 454int kernel_read(struct file *file, unsigned long offset,
 455        char * addr, unsigned long count)
 456{
 457        mm_segment_t old_fs;
 458        loff_t pos = offset;
 459        int result = -ENOSYS;
 460
 461        if (!file->f_op->read)
 462                goto fail;
 463        old_fs = get_fs();
 464        set_fs(get_ds());
 465        result = file->f_op->read(file, addr, count, &pos);
 466        set_fs(old_fs);
 467fail:
 468        return result;
 469}
 470
 471static int exec_mmap(struct mm_struct *mm)
 472{
 473        struct mm_struct * old_mm, *active_mm;
 474
 475        /* Add it to the list of mm's */
 476        spin_lock(&mmlist_lock);
 477        list_add(&mm->mmlist, &init_mm.mmlist);
 478        mmlist_nr++;
 479        spin_unlock(&mmlist_lock);
 480
 481        task_lock(current);
 482        old_mm = current->mm;
 483        active_mm = current->active_mm;
 484        current->mm = mm;
 485        current->active_mm = mm;
 486        activate_mm(active_mm, mm);
 487        task_unlock(current);
 488        mm_release();
 489        if (old_mm) {
 490                if (active_mm != old_mm) BUG();
 491                mmput(old_mm);
 492                return 0;
 493        }
 494        mmdrop(active_mm);
 495        return 0;
 496}
 497
 498static struct dentry *clean_proc_dentry(struct task_struct *p)
 499{
 500        struct dentry *proc_dentry = p->proc_dentry;
 501
 502        if (proc_dentry) {
 503                spin_lock(&dcache_lock);
 504                if (!list_empty(&proc_dentry->d_hash)) {
 505                        dget_locked(proc_dentry);
 506                        list_del_init(&proc_dentry->d_hash);
 507                } else
 508                        proc_dentry = NULL;
 509                spin_unlock(&dcache_lock);
 510        }
 511        return proc_dentry;
 512}
 513
 514static inline void put_proc_dentry(struct dentry *dentry)
 515{
 516        if (dentry) {
 517                shrink_dcache_parent(dentry);
 518                dput(dentry);
 519        }
 520}
 521
 522/*
 523 * This function makes sure the current process has its own signal table,
 524 * so that flush_signal_handlers can later reset the handlers without
 525 * disturbing other processes.  (Other processes might share the signal
 526 * table via the CLONE_SIGHAND option to clone().)
 527 */
 528static inline int de_thread(struct signal_struct *oldsig)
 529{
 530        struct signal_struct *newsig;
 531        int count;
 532
 533        if (atomic_read(&current->sig->count) <= 1)
 534                return 0;
 535
 536        newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
 537        if (!newsig)
 538                return -ENOMEM;
 539
 540        if (list_empty(&current->thread_group))
 541                goto out;
 542        /*
 543         * Kill all other threads in the thread group:
 544         */
 545        spin_lock_irq(&oldsig->siglock);
 546        if (oldsig->group_exit) {
 547                /*
 548                 * Another group action in progress, just
 549                 * return so that the signal is processed.
 550                 */
 551                spin_unlock_irq(&oldsig->siglock);
 552                kmem_cache_free(sigact_cachep, newsig);
 553                return -EAGAIN;
 554        }
 555        oldsig->group_exit = 1;
 556        __broadcast_thread_group(current, SIGKILL);
 557
 558        /*
 559         * Account for the thread group leader hanging around:
 560         */
 561        count = 2;
 562        if (current->pid == current->tgid)
 563                count = 1;
 564        while (atomic_read(&oldsig->count) > count) {
 565                oldsig->group_exit_task = current;
 566                current->state = TASK_UNINTERRUPTIBLE;
 567                spin_unlock_irq(&oldsig->siglock);
 568                schedule();
 569                spin_lock_irq(&oldsig->siglock);
 570                if (oldsig->group_exit_task)
 571                        BUG();
 572        }
 573        spin_unlock_irq(&oldsig->siglock);
 574
 575        /*
 576         * At this point all other threads have exited, all we have to
 577         * do is to wait for the thread group leader to become inactive,
 578         * and to assume its PID:
 579         */
 580        if (current->pid != current->tgid) {
 581                struct task_struct *leader = current->group_leader, *parent;
 582                struct dentry *proc_dentry1, *proc_dentry2;
 583                unsigned long state, ptrace;
 584
 585                /*
 586                 * Wait for the thread group leader to be a zombie.
 587                 * It should already be zombie at this point, most
 588                 * of the time.
 589                 */
 590                while (leader->state != TASK_ZOMBIE)
 591                        yield();
 592
 593                write_lock_irq(&tasklist_lock);
 594                proc_dentry1 = clean_proc_dentry(current);
 595                proc_dentry2 = clean_proc_dentry(leader);
 596
 597                if (leader->tgid != current->tgid)
 598                        BUG();
 599                if (current->pid == current->tgid)
 600                        BUG();
 601                /*
 602                 * An exec() starts a new thread group with the
 603                 * TGID of the previous thread group. Rehash the
 604                 * two threads with a switched PID, and release
 605                 * the former thread group leader:
 606                 */
 607                ptrace = leader->ptrace;
 608                parent = leader->parent;
 609
 610                ptrace_unlink(leader);
 611                ptrace_unlink(current);
 612                remove_parent(current);
 613                remove_parent(leader);
 614                /*
 615                 * Split up the last two remaining members of the
 616                 * thread group:
 617                 */
 618                list_del_init(&leader->thread_group);
 619
 620                leader->pid = leader->tgid = current->pid;
 621                current->pid = current->tgid;
 622                current->parent = current->real_parent = leader->real_parent;
 623                leader->parent = leader->real_parent = child_reaper;
 624                current->exit_signal = SIGCHLD;
 625
 626                add_parent(current, current->parent);
 627                add_parent(leader, leader->parent);
 628                if (ptrace) {
 629                        current->ptrace = ptrace;
 630                        __ptrace_link(current, parent);
 631                }
 632                
 633                list_add_tail(&current->tasks, &init_task.tasks);
 634                state = leader->state;
 635                write_unlock_irq(&tasklist_lock);
 636
 637                if (state != TASK_ZOMBIE)
 638                        BUG();
 639                release_task(leader);
 640
 641                put_proc_dentry(proc_dentry1);
 642                put_proc_dentry(proc_dentry2);
 643        }
 644
 645out:
 646        spin_lock_init(&newsig->siglock);
 647        atomic_set(&newsig->count, 1);
 648        newsig->group_exit = 0;
 649        newsig->group_exit_code = 0;
 650        newsig->group_exit_task = NULL;
 651        memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
 652        init_sigpending(&newsig->shared_pending);
 653
 654        remove_thread_group(current, current->sig);
 655        spin_lock_irq(&current->sigmask_lock);
 656        current->sig = newsig;
 657        init_sigpending(&current->pending);
 658        recalc_sigpending();
 659        spin_unlock_irq(&current->sigmask_lock);
 660
 661        if (atomic_dec_and_test(&oldsig->count))
 662                kmem_cache_free(sigact_cachep, oldsig);
 663
 664        if (!list_empty(&current->thread_group))
 665                BUG();
 666        if (current->tgid != current->pid)
 667                BUG();
 668        return 0;
 669}
 670        
 671/*
 672 * These functions flushes out all traces of the currently running executable
 673 * so that a new one can be started
 674 */
 675
 676static inline void flush_old_files(struct files_struct * files)
 677{
 678        long j = -1;
 679
 680        write_lock(&files->file_lock);
 681        for (;;) {
 682                unsigned long set, i;
 683
 684                j++;
 685                i = j * __NFDBITS;
 686                if (i >= files->max_fds || i >= files->max_fdset)
 687                        break;
 688                set = files->close_on_exec->fds_bits[j];
 689                if (!set)
 690                        continue;
 691                files->close_on_exec->fds_bits[j] = 0;
 692                write_unlock(&files->file_lock);
 693                for ( ; set ; i++,set >>= 1) {
 694                        if (set & 1) {
 695                                sys_close(i);
 696                        }
 697                }
 698                write_lock(&files->file_lock);
 699
 700        }
 701        write_unlock(&files->file_lock);
 702}
 703
 704int flush_old_exec(struct linux_binprm * bprm)
 705{
 706        char * name;
 707        int i, ch, retval;
 708        struct signal_struct * oldsig = current->sig;
 709
 710        /* 
 711         * Release all of the old mmap stuff
 712         */
 713        retval = exec_mmap(bprm->mm);
 714        if (retval)
 715                goto mmap_failed;
 716        /*
 717         * Make sure we have a private signal table and that
 718         * we are unassociated from the previous thread group.
 719         */
 720        retval = de_thread(oldsig);
 721        if (retval)
 722                goto flush_failed;
 723
 724        /* This is the point of no return */
 725
 726        current->sas_ss_sp = current->sas_ss_size = 0;
 727
 728        if (current->euid == current->uid && current->egid == current->gid)
 729                current->mm->dumpable = 1;
 730        name = bprm->filename;
 731        for (i=0; (ch = *(name++)) != '\0';) {
 732                if (ch == '/')
 733                        i = 0;
 734                else
 735                        if (i < 15)
 736                                current->comm[i++] = ch;
 737        }
 738        current->comm[i] = '\0';
 739
 740        flush_thread();
 741
 742        if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
 743            permission(bprm->file->f_dentry->d_inode,MAY_READ))
 744                current->mm->dumpable = 0;
 745
 746        /* An exec changes our domain. We are no longer part of the thread
 747           group */
 748           
 749        current->self_exec_id++;
 750                        
 751        flush_signal_handlers(current);
 752        flush_old_files(current->files);
 753
 754        return 0;
 755
 756mmap_failed:
 757flush_failed:
 758        spin_lock_irq(&current->sigmask_lock);
 759        if (current->sig != oldsig) {
 760                kmem_cache_free(sigact_cachep, current->sig);
 761                current->sig = oldsig;
 762        }
 763        spin_unlock_irq(&current->sigmask_lock);
 764        return retval;
 765}
 766
 767/*
 768 * We mustn't allow tracing of suid binaries, unless
 769 * the tracer has the capability to trace anything..
 770 */
 771static inline int must_not_trace_exec(struct task_struct * p)
 772{
 773        return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
 774}
 775
 776/* 
 777 * Fill the binprm structure from the inode. 
 778 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
 779 */
 780int prepare_binprm(struct linux_binprm *bprm)
 781{
 782        int mode;
 783        struct inode * inode = bprm->file->f_dentry->d_inode;
 784        int retval;
 785
 786        mode = inode->i_mode;
 787        /*
 788         * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
 789         * vfs_permission lets a non-executable through
 790         */
 791        if (!(mode & 0111))     /* with at least _one_ execute bit set */
 792                return -EACCES;
 793        if (bprm->file->f_op == NULL)
 794                return -EACCES;
 795
 796        bprm->e_uid = current->euid;
 797        bprm->e_gid = current->egid;
 798
 799        if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
 800                /* Set-uid? */
 801                if (mode & S_ISUID)
 802                        bprm->e_uid = inode->i_uid;
 803
 804                /* Set-gid? */
 805                /*
 806                 * If setgid is set but no group execute bit then this
 807                 * is a candidate for mandatory locking, not a setgid
 808                 * executable.
 809                 */
 810                if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
 811                        bprm->e_gid = inode->i_gid;
 812        }
 813
 814        /* fill in binprm security blob */
 815        retval = security_ops->bprm_set_security(bprm);
 816        if (retval)
 817                return retval;
 818
 819        memset(bprm->buf,0,BINPRM_BUF_SIZE);
 820        return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
 821}
 822
 823/*
 824 * This function is used to produce the new IDs and capabilities
 825 * from the old ones and the file's capabilities.
 826 *
 827 * The formula used for evolving capabilities is:
 828 *
 829 *       pI' = pI
 830 * (***) pP' = (fP & X) | (fI & pI)
 831 *       pE' = pP' & fE          [NB. fE is 0 or ~0]
 832 *
 833 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
 834 * ' indicates post-exec(), and X is the global 'cap_bset'.
 835 *
 836 */
 837
 838void compute_creds(struct linux_binprm *bprm) 
 839{
 840        int do_unlock = 0;
 841
 842        if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) {
 843                current->mm->dumpable = 0;
 844                
 845                lock_kernel();
 846                if (must_not_trace_exec(current)
 847                    || atomic_read(&current->fs->count) > 1
 848                    || atomic_read(&current->files->count) > 1
 849                    || atomic_read(&current->sig->count) > 1) {
 850                        if(!capable(CAP_SETUID)) {
 851                                bprm->e_uid = current->uid;
 852                                bprm->e_gid = current->gid;
 853                        }
 854                }
 855                do_unlock = 1;
 856        }
 857
 858        current->suid = current->euid = current->fsuid = bprm->e_uid;
 859        current->sgid = current->egid = current->fsgid = bprm->e_gid;
 860
 861        if(do_unlock)
 862                unlock_kernel();
 863
 864        security_ops->bprm_compute_creds(bprm);
 865}
 866
 867void remove_arg_zero(struct linux_binprm *bprm)
 868{
 869        if (bprm->argc) {
 870                unsigned long offset;
 871                char * kaddr;
 872                struct page *page;
 873
 874                offset = bprm->p % PAGE_SIZE;
 875                goto inside;
 876
 877                while (bprm->p++, *(kaddr+offset++)) {
 878                        if (offset != PAGE_SIZE)
 879                                continue;
 880                        offset = 0;
 881                        kunmap(page);
 882inside:
 883                        page = bprm->page[bprm->p/PAGE_SIZE];
 884                        kaddr = kmap(page);
 885                }
 886                kunmap(page);
 887                bprm->argc--;
 888        }
 889}
 890
 891/*
 892 * cycle the list of binary formats handler, until one recognizes the image
 893 */
 894int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 895{
 896        int try,retval=0;
 897        struct linux_binfmt *fmt;
 898#ifdef __alpha__
 899        /* handle /sbin/loader.. */
 900        {
 901            struct exec * eh = (struct exec *) bprm->buf;
 902
 903            if (!bprm->loader && eh->fh.f_magic == 0x183 &&
 904                (eh->fh.f_flags & 0x3000) == 0x3000)
 905            {
 906                struct file * file;
 907                unsigned long loader;
 908
 909                allow_write_access(bprm->file);
 910                fput(bprm->file);
 911                bprm->file = NULL;
 912
 913                loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 914
 915                file = open_exec("/sbin/loader");
 916                retval = PTR_ERR(file);
 917                if (IS_ERR(file))
 918                        return retval;
 919
 920                /* Remember if the application is TASO.  */
 921                bprm->sh_bang = eh->ah.entry < 0x100000000;
 922
 923                bprm->file = file;
 924                bprm->loader = loader;
 925                retval = prepare_binprm(bprm);
 926                if (retval<0)
 927                        return retval;
 928                /* should call search_binary_handler recursively here,
 929                   but it does not matter */
 930            }
 931        }
 932#endif
 933        retval = security_ops->bprm_check_security(bprm);
 934        if (retval) 
 935                return retval;
 936
 937        /* kernel module loader fixup */
 938        /* so we don't try to load run modprobe in kernel space. */
 939        set_fs(USER_DS);
 940        for (try=0; try<2; try++) {
 941                read_lock(&binfmt_lock);
 942                for (fmt = formats ; fmt ; fmt = fmt->next) {
 943                        int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
 944                        if (!fn)
 945                                continue;
 946                        if (!try_inc_mod_count(fmt->module))
 947                                continue;
 948                        read_unlock(&binfmt_lock);
 949                        retval = fn(bprm, regs);
 950                        if (retval >= 0) {
 951                                put_binfmt(fmt);
 952                                allow_write_access(bprm->file);
 953                                if (bprm->file)
 954                                        fput(bprm->file);
 955                                bprm->file = NULL;
 956                                current->did_exec = 1;
 957                                return retval;
 958                        }
 959                        read_lock(&binfmt_lock);
 960                        put_binfmt(fmt);
 961                        if (retval != -ENOEXEC)
 962                                break;
 963                        if (!bprm->file) {
 964                                read_unlock(&binfmt_lock);
 965                                return retval;
 966                        }
 967                }
 968                read_unlock(&binfmt_lock);
 969                if (retval != -ENOEXEC) {
 970                        break;
 971#ifdef CONFIG_KMOD
 972                }else{
 973#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
 974                        char modname[20];
 975                        if (printable(bprm->buf[0]) &&
 976                            printable(bprm->buf[1]) &&
 977                            printable(bprm->buf[2]) &&
 978                            printable(bprm->buf[3]))
 979                                break; /* -ENOEXEC */
 980                        sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
 981                        request_module(modname);
 982#endif
 983                }
 984        }
 985        return retval;
 986}
 987
 988/*
 989 * sys_execve() executes a new program.
 990 */
 991int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
 992{
 993        struct linux_binprm bprm;
 994        struct file *file;
 995        int retval;
 996        int i;
 997
 998        file = open_exec(filename);
 999
1000        retval = PTR_ERR(file);
1001        if (IS_ERR(file))
1002                return retval;
1003
1004        bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
1005        memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 
1006
1007        bprm.file = file;
1008        bprm.filename = filename;
1009        bprm.sh_bang = 0;
1010        bprm.loader = 0;
1011        bprm.exec = 0;
1012        bprm.security = NULL;
1013        bprm.mm = mm_alloc();
1014        retval = -ENOMEM;
1015        if (!bprm.mm)
1016                goto out_file;
1017
1018        retval = init_new_context(current, bprm.mm);
1019        if (retval < 0)
1020                goto out_mm;
1021
1022        bprm.argc = count(argv, bprm.p / sizeof(void *));
1023        if ((retval = bprm.argc) < 0)
1024                goto out_mm;
1025
1026        bprm.envc = count(envp, bprm.p / sizeof(void *));
1027        if ((retval = bprm.envc) < 0)
1028                goto out_mm;
1029
1030        retval = security_ops->bprm_alloc_security(&bprm);
1031        if (retval) 
1032                goto out;
1033
1034        retval = prepare_binprm(&bprm);
1035        if (retval < 0) 
1036                goto out; 
1037
1038        retval = copy_strings_kernel(1, &bprm.filename, &bprm);
1039        if (retval < 0) 
1040                goto out; 
1041
1042        bprm.exec = bprm.p;
1043        retval = copy_strings(bprm.envc, envp, &bprm);
1044        if (retval < 0) 
1045                goto out; 
1046
1047        retval = copy_strings(bprm.argc, argv, &bprm);
1048        if (retval < 0) 
1049                goto out; 
1050
1051        retval = search_binary_handler(&bprm,regs);
1052        if (retval >= 0) {
1053                /* execve success */
1054                security_ops->bprm_free_security(&bprm);
1055                return retval;
1056        }
1057
1058out:
1059        /* Something went wrong, return the inode and free the argument pages*/
1060        for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
1061                struct page * page = bprm.page[i];
1062                if (page)
1063                        __free_page(page);
1064        }
1065
1066        if (bprm.security)
1067                security_ops->bprm_free_security(&bprm);
1068
1069out_mm:
1070        mmdrop(bprm.mm);
1071
1072out_file:
1073        if (bprm.file) {
1074                allow_write_access(bprm.file);
1075                fput(bprm.file);
1076        }
1077        return retval;
1078}
1079
1080void set_binfmt(struct linux_binfmt *new)
1081{
1082        struct linux_binfmt *old = current->binfmt;
1083        if (new && new->module)
1084                __MOD_INC_USE_COUNT(new->module);
1085        current->binfmt = new;
1086        if (old && old->module)
1087                __MOD_DEC_USE_COUNT(old->module);
1088}
1089
1090int do_coredump(long signr, struct pt_regs * regs)
1091{
1092        struct linux_binfmt * binfmt;
1093        char corename[6+sizeof(current->comm)+10];
1094        struct file * file;
1095        struct inode * inode;
1096        int retval = 0;
1097
1098        lock_kernel();
1099        binfmt = current->binfmt;
1100        if (!binfmt || !binfmt->core_dump)
1101                goto fail;
1102        if (!current->mm->dumpable)
1103                goto fail;
1104        current->mm->dumpable = 0;
1105        if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
1106                goto fail;
1107
1108        memcpy(corename,"core", 5); /* include trailing \0 */
1109        if (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)
1110                sprintf(&corename[4], ".%d", current->pid);
1111        file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
1112        if (IS_ERR(file))
1113                goto fail;
1114        inode = file->f_dentry->d_inode;
1115        if (inode->i_nlink > 1)
1116                goto close_fail;        /* multiple links - don't dump */
1117        if (d_unhashed(file->f_dentry))
1118                goto close_fail;
1119
1120        if (!S_ISREG(inode->i_mode))
1121                goto close_fail;
1122        if (!file->f_op)
1123                goto close_fail;
1124        if (!file->f_op->write)
1125                goto close_fail;
1126        if (do_truncate(file->f_dentry, 0) != 0)
1127                goto close_fail;
1128
1129        retval = binfmt->core_dump(signr, regs, file);
1130
1131close_fail:
1132        filp_close(file, NULL);
1133fail:
1134        unlock_kernel();
1135        return retval;
1136}
1137
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.