linux-bk/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/a.out.h>
  20#include <linux/errno.h>
  21#include <linux/signal.h>
  22#include <linux/binfmts.h>
  23#include <linux/string.h>
  24#include <linux/file.h>
  25#include <linux/fcntl.h>
  26#include <linux/ptrace.h>
  27#include <linux/slab.h>
  28#include <linux/shm.h>
  29#include <linux/personality.h>
  30#include <linux/elfcore.h>
  31#include <linux/init.h>
  32#include <linux/highuid.h>
  33#include <linux/smp.h>
  34#include <linux/smp_lock.h>
  35#include <linux/compiler.h>
  36#include <linux/highmem.h>
  37#include <linux/pagemap.h>
  38#include <linux/security.h>
  39#include <linux/syscalls.h>
  40
  41#include <asm/uaccess.h>
  42#include <asm/param.h>
  43#include <asm/page.h>
  44
  45#include <linux/elf.h>
  46
  47static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
  48static int load_elf_library(struct file*);
  49static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
  50extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
  51
  52#ifndef elf_addr_t
  53#define elf_addr_t unsigned long
  54#endif
  55
  56/*
  57 * If we don't support core dumping, then supply a NULL so we
  58 * don't even try.
  59 */
  60#ifdef USE_ELF_CORE_DUMP
  61static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
  62#else
  63#define elf_core_dump   NULL
  64#endif
  65
  66#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  67# define ELF_MIN_ALIGN  ELF_EXEC_PAGESIZE
  68#else
  69# define ELF_MIN_ALIGN  PAGE_SIZE
  70#endif
  71
  72#ifndef ELF_CORE_EFLAGS
  73#define ELF_CORE_EFLAGS 0
  74#endif
  75
  76#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  77#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  78#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  79
  80static struct linux_binfmt elf_format = {
  81                .module         = THIS_MODULE,
  82                .load_binary    = load_elf_binary,
  83                .load_shlib     = load_elf_library,
  84                .core_dump      = elf_core_dump,
  85                .min_coredump   = ELF_EXEC_PAGESIZE
  86};
  87
  88#define BAD_ADDR(x)     ((unsigned long)(x) > TASK_SIZE)
  89
  90static int set_brk(unsigned long start, unsigned long end)
  91{
  92        start = ELF_PAGEALIGN(start);
  93        end = ELF_PAGEALIGN(end);
  94        if (end > start) {
  95                unsigned long addr;
  96                down_write(&current->mm->mmap_sem);
  97                addr = do_brk(start, end - start);
  98                up_write(&current->mm->mmap_sem);
  99                if (BAD_ADDR(addr))
 100                        return addr;
 101        }
 102        current->mm->start_brk = current->mm->brk = end;
 103        return 0;
 104}
 105
 106
 107/* We need to explicitly zero any fractional pages
 108   after the data section (i.e. bss).  This would
 109   contain the junk from the file that should not
 110   be in memory */
 111
 112
 113static int padzero(unsigned long elf_bss)
 114{
 115        unsigned long nbyte;
 116
 117        nbyte = ELF_PAGEOFFSET(elf_bss);
 118        if (nbyte) {
 119                nbyte = ELF_MIN_ALIGN - nbyte;
 120                if (clear_user((void __user *) elf_bss, nbyte))
 121                        return -EFAULT;
 122        }
 123        return 0;
 124}
 125
 126/* Let's use some macros to make this stack manipulation a litle clearer */
 127#ifdef CONFIG_STACK_GROWSUP
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 129#define STACK_ROUND(sp, items) \
 130        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
 132#else
 133#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 134#define STACK_ROUND(sp, items) \
 135        (((unsigned long) (sp - items)) &~ 15UL)
 136#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 137#endif
 138
 139static int
 140create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 141                int interp_aout, unsigned long load_addr,
 142                unsigned long interp_load_addr)
 143{
 144        unsigned long p = bprm->p;
 145        int argc = bprm->argc;
 146        int envc = bprm->envc;
 147        elf_addr_t __user *argv;
 148        elf_addr_t __user *envp;
 149        elf_addr_t __user *sp;
 150        elf_addr_t __user *u_platform;
 151        const char *k_platform = ELF_PLATFORM;
 152        int items;
 153        elf_addr_t *elf_info;
 154        int ei_index = 0;
 155        struct task_struct *tsk = current;
 156
 157        /*
 158         * If this architecture has a platform capability string, copy it
 159         * to userspace.  In some cases (Sparc), this info is impossible
 160         * for userspace to get any other way, in others (i386) it is
 161         * merely difficult.
 162         */
 163
 164        u_platform = NULL;
 165        if (k_platform) {
 166                size_t len = strlen(k_platform) + 1;
 167
 168#ifdef CONFIG_X86_HT
 169                /*
 170                 * In some cases (e.g. Hyper-Threading), we want to avoid L1
 171                 * evictions by the processes running on the same package. One
 172                 * thing we can do is to shuffle the initial stack for them.
 173                 *
 174                 * The conditionals here are unneeded, but kept in to make the
 175                 * code behaviour the same as pre change unless we have
 176                 * hyperthreaded processors. This should be cleaned up
 177                 * before 2.6
 178                 */
 179         
 180                if (smp_num_siblings > 1)
 181                        STACK_ALLOC(p, ((current->pid % 64) << 7));
 182#endif
 183                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 184                if (__copy_to_user(u_platform, k_platform, len))
 185                        return -EFAULT;
 186        }
 187
 188        /* Create the ELF interpreter info */
 189        elf_info = (elf_addr_t *) current->mm->saved_auxv;
 190#define NEW_AUX_ENT(id, val) \
 191        do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
 192
 193#ifdef ARCH_DLINFO
 194        /* 
 195         * ARCH_DLINFO must come first so PPC can do its special alignment of
 196         * AUXV.
 197         */
 198        ARCH_DLINFO;
 199#endif
 200        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 201        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 202        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 203        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 204        NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
 205        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 206        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 207        NEW_AUX_ENT(AT_FLAGS, 0);
 208        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 209        NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
 210        NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
 211        NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
 212        NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
 213        NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
 214        if (k_platform) {
 215                NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
 216        }
 217        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 218                NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
 219        }
 220#undef NEW_AUX_ENT
 221        /* AT_NULL is zero; clear the rest too */
 222        memset(&elf_info[ei_index], 0,
 223               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 224
 225        /* And advance past the AT_NULL entry.  */
 226        ei_index += 2;
 227
 228        sp = STACK_ADD(p, ei_index);
 229
 230        items = (argc + 1) + (envc + 1);
 231        if (interp_aout) {
 232                items += 3; /* a.out interpreters require argv & envp too */
 233        } else {
 234                items += 1; /* ELF interpreters only put argc on the stack */
 235        }
 236        bprm->p = STACK_ROUND(sp, items);
 237
 238        /* Point sp at the lowest address on the stack */
 239#ifdef CONFIG_STACK_GROWSUP
 240        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 241        bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
 242#else
 243        sp = (elf_addr_t __user *)bprm->p;
 244#endif
 245
 246        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 247        if (__put_user(argc, sp++))
 248                return -EFAULT;
 249        if (interp_aout) {
 250                argv = sp + 2;
 251                envp = argv + argc + 1;
 252                __put_user((elf_addr_t)(unsigned long)argv, sp++);
 253                __put_user((elf_addr_t)(unsigned long)envp, sp++);
 254        } else {
 255                argv = sp;
 256                envp = argv + argc + 1;
 257        }
 258
 259        /* Populate argv and envp */
 260        p = current->mm->arg_start;
 261        while (argc-- > 0) {
 262                size_t len;
 263                __put_user((elf_addr_t)p, argv++);
 264                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 265                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 266                        return 0;
 267                p += len;
 268        }
 269        if (__put_user(0, argv))
 270                return -EFAULT;
 271        current->mm->arg_end = current->mm->env_start = p;
 272        while (envc-- > 0) {
 273                size_t len;
 274                __put_user((elf_addr_t)p, envp++);
 275                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 276                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 277                        return 0;
 278                p += len;
 279        }
 280        if (__put_user(0, envp))
 281                return -EFAULT;
 282        current->mm->env_end = p;
 283
 284        /* Put the elf_info on the stack in the right place.  */
 285        sp = (elf_addr_t __user *)envp + 1;
 286        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 287                return -EFAULT;
 288        return 0;
 289}
 290
 291#ifndef elf_map
 292
 293static unsigned long elf_map(struct file *filep, unsigned long addr,
 294                        struct elf_phdr *eppnt, int prot, int type)
 295{
 296        unsigned long map_addr;
 297
 298        down_write(&current->mm->mmap_sem);
 299        map_addr = do_mmap(filep, ELF_PAGESTART(addr),
 300                           eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type,
 301                           eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
 302        up_write(&current->mm->mmap_sem);
 303        return(map_addr);
 304}
 305
 306#endif /* !elf_map */
 307
 308/* This is much more generalized than the library routine read function,
 309   so we keep this separate.  Technically the library read function
 310   is only provided so that we can read a.out libraries that have
 311   an ELF header */
 312
 313static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 314                                     struct file * interpreter,
 315                                     unsigned long *interp_load_addr)
 316{
 317        struct elf_phdr *elf_phdata;
 318        struct elf_phdr *eppnt;
 319        unsigned long load_addr = 0;
 320        int load_addr_set = 0;
 321        unsigned long last_bss = 0, elf_bss = 0;
 322        unsigned long error = ~0UL;
 323        int retval, i, size;
 324
 325        /* First of all, some simple consistency checks */
 326        if (interp_elf_ex->e_type != ET_EXEC &&
 327            interp_elf_ex->e_type != ET_DYN)
 328                goto out;
 329        if (!elf_check_arch(interp_elf_ex))
 330                goto out;
 331        if (!interpreter->f_op || !interpreter->f_op->mmap)
 332                goto out;
 333
 334        /*
 335         * If the size of this structure has changed, then punt, since
 336         * we will be doing the wrong thing.
 337         */
 338        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 339                goto out;
 340        if (interp_elf_ex->e_phnum < 1 ||
 341                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 342                goto out;
 343
 344        /* Now read in all of the header information */
 345
 346        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 347        if (size > ELF_MIN_ALIGN)
 348                goto out;
 349        elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
 350        if (!elf_phdata)
 351                goto out;
 352
 353        retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
 354        error = -EIO;
 355        if (retval != size) {
 356                if (retval < 0)
 357                        error = retval; 
 358                goto out_close;
 359        }
 360
 361        eppnt = elf_phdata;
 362        for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
 363          if (eppnt->p_type == PT_LOAD) {
 364            int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 365            int elf_prot = 0;
 366            unsigned long vaddr = 0;
 367            unsigned long k, map_addr;
 368
 369            if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
 370            if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 371            if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 372            vaddr = eppnt->p_vaddr;
 373            if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 374                elf_type |= MAP_FIXED;
 375
 376            map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
 377            error = map_addr;
 378            if (BAD_ADDR(map_addr))
 379                goto out_close;
 380
 381            if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
 382                load_addr = map_addr - ELF_PAGESTART(vaddr);
 383                load_addr_set = 1;
 384            }
 385
 386            /*
 387             * Check to see if the section's size will overflow the
 388             * allowed task size. Note that p_filesz must always be
 389             * <= p_memsize so it is only necessary to check p_memsz.
 390             */
 391            k = load_addr + eppnt->p_vaddr;
 392            if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
 393                eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
 394                error = -ENOMEM;
 395                goto out_close;
 396            }
 397
 398            /*
 399             * Find the end of the file mapping for this phdr, and keep
 400             * track of the largest address we see for this.
 401             */
 402            k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 403            if (k > elf_bss)
 404                elf_bss = k;
 405
 406            /*
 407             * Do the same thing for the memory mapping - between
 408             * elf_bss and last_bss is the bss section.
 409             */
 410            k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 411            if (k > last_bss)
 412                last_bss = k;
 413          }
 414        }
 415
 416        /*
 417         * Now fill out the bss section.  First pad the last page up
 418         * to the page boundary, and then perform a mmap to make sure
 419         * that there are zero-mapped pages up to and including the 
 420         * last bss page.
 421         */
 422        if (padzero(elf_bss)) {
 423                error = -EFAULT;
 424                goto out_close;
 425        }
 426
 427        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);   /* What we have mapped so far */
 428
 429        /* Map the last of the bss segment */
 430        if (last_bss > elf_bss) {
 431                down_write(&current->mm->mmap_sem);
 432                error = do_brk(elf_bss, last_bss - elf_bss);
 433                up_write(&current->mm->mmap_sem);
 434                if (BAD_ADDR(error))
 435                        goto out_close;
 436        }
 437
 438        *interp_load_addr = load_addr;
 439        error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
 440
 441out_close:
 442        kfree(elf_phdata);
 443out:
 444        return error;
 445}
 446
 447static unsigned long load_aout_interp(struct exec * interp_ex,
 448                             struct file * interpreter)
 449{
 450        unsigned long text_data, elf_entry = ~0UL;
 451        char __user * addr;
 452        loff_t offset;
 453
 454        current->mm->end_code = interp_ex->a_text;
 455        text_data = interp_ex->a_text + interp_ex->a_data;
 456        current->mm->end_data = text_data;
 457        current->mm->brk = interp_ex->a_bss + text_data;
 458
 459        switch (N_MAGIC(*interp_ex)) {
 460        case OMAGIC:
 461                offset = 32;
 462                addr = (char __user *)0;
 463                break;
 464        case ZMAGIC:
 465        case QMAGIC:
 466                offset = N_TXTOFF(*interp_ex);
 467                addr = (char __user *) N_TXTADDR(*interp_ex);
 468                break;
 469        default:
 470                goto out;
 471        }
 472
 473        down_write(&current->mm->mmap_sem);     
 474        do_brk(0, text_data);
 475        up_write(&current->mm->mmap_sem);
 476        if (!interpreter->f_op || !interpreter->f_op->read)
 477                goto out;
 478        if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
 479                goto out;
 480        flush_icache_range((unsigned long)addr,
 481                           (unsigned long)addr + text_data);
 482
 483
 484        down_write(&current->mm->mmap_sem);     
 485        do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 486                interp_ex->a_bss);
 487        up_write(&current->mm->mmap_sem);
 488        elf_entry = interp_ex->a_entry;
 489
 490out:
 491        return elf_entry;
 492}
 493
 494/*
 495 * These are the functions used to load ELF style executables and shared
 496 * libraries.  There is no binary dependent code anywhere else.
 497 */
 498
 499#define INTERPRETER_NONE 0
 500#define INTERPRETER_AOUT 1
 501#define INTERPRETER_ELF 2
 502
 503
 504static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 505{
 506        struct file *interpreter = NULL; /* to shut gcc up */
 507        unsigned long load_addr = 0, load_bias = 0;
 508        int load_addr_set = 0;
 509        char * elf_interpreter = NULL;
 510        unsigned int interpreter_type = INTERPRETER_NONE;
 511        unsigned char ibcs2_interpreter = 0;
 512        unsigned long error;
 513        struct elf_phdr * elf_ppnt, *elf_phdata;
 514        unsigned long elf_bss, elf_brk;
 515        int elf_exec_fileno;
 516        int retval, i;
 517        unsigned int size;
 518        unsigned long elf_entry, interp_load_addr = 0;
 519        unsigned long start_code, end_code, start_data, end_data;
 520        unsigned long reloc_func_desc = 0;
 521        char passed_fileno[6];
 522        struct files_struct *files;
 523        int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
 524        unsigned long def_flags = 0;
 525        struct {
 526                struct elfhdr elf_ex;
 527                struct elfhdr interp_elf_ex;
 528                struct exec interp_ex;
 529        } *loc;
 530
 531        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 532        if (!loc) {
 533                retval = -ENOMEM;
 534                goto out_ret;
 535        }
 536        
 537        /* Get the exec-header */
 538        loc->elf_ex = *((struct elfhdr *) bprm->buf);
 539
 540        retval = -ENOEXEC;
 541        /* First of all, some simple consistency checks */
 542        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 543                goto out;
 544
 545        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 546                goto out;
 547        if (!elf_check_arch(&loc->elf_ex))
 548                goto out;
 549        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 550                goto out;
 551
 552        /* Now read in all of the header information */
 553
 554        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 555                goto out;
 556        if (loc->elf_ex.e_phnum < 1 ||
 557                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 558                goto out;
 559        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 560        retval = -ENOMEM;
 561        elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
 562        if (!elf_phdata)
 563                goto out;
 564
 565        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
 566        if (retval != size) {
 567                if (retval >= 0)
 568                        retval = -EIO;
 569                goto out_free_ph;
 570        }
 571
 572        files = current->files;         /* Refcounted so ok */
 573        retval = unshare_files();
 574        if (retval < 0)
 575                goto out_free_ph;
 576        if (files == current->files) {
 577                put_files_struct(files);
 578                files = NULL;
 579        }
 580
 581        /* exec will make our files private anyway, but for the a.out
 582           loader stuff we need to do it earlier */
 583
 584        retval = get_unused_fd();
 585        if (retval < 0)
 586                goto out_free_fh;
 587        get_file(bprm->file);
 588        fd_install(elf_exec_fileno = retval, bprm->file);
 589
 590        elf_ppnt = elf_phdata;
 591        elf_bss = 0;
 592        elf_brk = 0;
 593
 594        start_code = ~0UL;
 595        end_code = 0;
 596        start_data = 0;
 597        end_data = 0;
 598
 599        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 600                if (elf_ppnt->p_type == PT_INTERP) {
 601                        /* This is the program interpreter used for
 602                         * shared libraries - for now assume that this
 603                         * is an a.out format binary
 604                         */
 605
 606                        retval = -ENOEXEC;
 607                        if (elf_ppnt->p_filesz > PATH_MAX || 
 608                            elf_ppnt->p_filesz < 2)
 609                                goto out_free_file;
 610
 611                        retval = -ENOMEM;
 612                        elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
 613                                                           GFP_KERNEL);
 614                        if (!elf_interpreter)
 615                                goto out_free_file;
 616
 617                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 618                                           elf_interpreter,
 619                                           elf_ppnt->p_filesz);
 620                        if (retval != elf_ppnt->p_filesz) {
 621                                if (retval >= 0)
 622                                        retval = -EIO;
 623                                goto out_free_interp;
 624                        }
 625                        /* make sure path is NULL terminated */
 626                        retval = -ENOEXEC;
 627                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 628                                goto out_free_interp;
 629
 630                        /* If the program interpreter is one of these two,
 631                         * then assume an iBCS2 image. Otherwise assume
 632                         * a native linux image.
 633                         */
 634                        if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 635                            strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 636                                ibcs2_interpreter = 1;
 637
 638                        /*
 639                         * The early SET_PERSONALITY here is so that the lookup
 640                         * for the interpreter happens in the namespace of the 
 641                         * to-be-execed image.  SET_PERSONALITY can select an
 642                         * alternate root.
 643                         *
 644                         * However, SET_PERSONALITY is NOT allowed to switch
 645                         * this task into the new images's memory mapping
 646                         * policy - that is, TASK_SIZE must still evaluate to
 647                         * that which is appropriate to the execing application.
 648                         * This is because exit_mmap() needs to have TASK_SIZE
 649                         * evaluate to the size of the old image.
 650                         *
 651                         * So if (say) a 64-bit application is execing a 32-bit
 652                         * application it is the architecture's responsibility
 653                         * to defer changing the value of TASK_SIZE until the
 654                         * switch really is going to happen - do this in
 655                         * flush_thread().      - akpm
 656                         */
 657                        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 658
 659                        interpreter = open_exec(elf_interpreter);
 660                        retval = PTR_ERR(interpreter);
 661                        if (IS_ERR(interpreter))
 662                                goto out_free_interp;
 663                        retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
 664                        if (retval != BINPRM_BUF_SIZE) {
 665                                if (retval >= 0)
 666                                        retval = -EIO;
 667                                goto out_free_dentry;
 668                        }
 669
 670                        /* Get the exec headers */
 671                        loc->interp_ex = *((struct exec *) bprm->buf);
 672                        loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
 673                        break;
 674                }
 675                elf_ppnt++;
 676        }
 677
 678        elf_ppnt = elf_phdata;
 679        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 680                if (elf_ppnt->p_type == PT_GNU_STACK) {
 681                        if (elf_ppnt->p_flags & PF_X)
 682                                executable_stack = EXSTACK_ENABLE_X;
 683                        else
 684                                executable_stack = EXSTACK_DISABLE_X;
 685                        break;
 686                }
 687        have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
 688
 689        /* Some simple consistency checks for the interpreter */
 690        if (elf_interpreter) {
 691                interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 692
 693                /* Now figure out which format our binary is */
 694                if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
 695                    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 696                    (N_MAGIC(loc->interp_ex) != QMAGIC))
 697                        interpreter_type = INTERPRETER_ELF;
 698
 699                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 700                        interpreter_type &= ~INTERPRETER_ELF;
 701
 702                retval = -ELIBBAD;
 703                if (!interpreter_type)
 704                        goto out_free_dentry;
 705
 706                /* Make sure only one type was selected */
 707                if ((interpreter_type & INTERPRETER_ELF) &&
 708                     interpreter_type != INTERPRETER_ELF) {
 709                        // FIXME - ratelimit this before re-enabling
 710                        // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
 711                        interpreter_type = INTERPRETER_ELF;
 712                }
 713                /* Verify the interpreter has a valid arch */
 714                if ((interpreter_type == INTERPRETER_ELF) &&
 715                    !elf_check_arch(&loc->interp_elf_ex))
 716                        goto out_free_dentry;
 717        } else {
 718                /* Executables without an interpreter also need a personality  */
 719                SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 720        }
 721
 722        /* OK, we are done with that, now set up the arg stuff,
 723           and then start this sucker up */
 724
 725        if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 726                char *passed_p = passed_fileno;
 727                sprintf(passed_fileno, "%d", elf_exec_fileno);
 728
 729                if (elf_interpreter) {
 730                        retval = copy_strings_kernel(1, &passed_p, bprm);
 731                        if (retval)
 732                                goto out_free_dentry; 
 733                        bprm->argc++;
 734                }
 735        }
 736
 737        /* Flush all traces of the currently running executable */
 738        retval = flush_old_exec(bprm);
 739        if (retval)
 740                goto out_free_dentry;
 741
 742        /* Discard our unneeded old files struct */
 743        if (files) {
 744                steal_locks(files);
 745                put_files_struct(files);
 746                files = NULL;
 747        }
 748
 749        /* OK, This is the point of no return */
 750        current->mm->start_data = 0;
 751        current->mm->end_data = 0;
 752        current->mm->end_code = 0;
 753        current->mm->mmap = NULL;
 754        current->flags &= ~PF_FORKNOEXEC;
 755        current->mm->def_flags = def_flags;
 756
 757        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 758           may depend on the personality.  */
 759        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 760        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 761                current->personality |= READ_IMPLIES_EXEC;
 762
 763        arch_pick_mmap_layout(current->mm);
 764
 765        /* Do this so that we can load the interpreter, if need be.  We will
 766           change some of these later */
 767        current->mm->rss = 0;
 768        current->mm->free_area_cache = current->mm->mmap_base;
 769        retval = setup_arg_pages(bprm, STACK_TOP, executable_stack);
 770        if (retval < 0) {
 771                send_sig(SIGKILL, current, 0);
 772                goto out_free_dentry;
 773        }
 774        
 775        current->mm->start_stack = bprm->p;
 776
 777        /* Now we do a little grungy work by mmaping the ELF image into
 778           the correct location in memory.  At this point, we assume that
 779           the image should be loaded at fixed address, not at a variable
 780           address. */
 781
 782        for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 783                int elf_prot = 0, elf_flags;
 784                unsigned long k, vaddr;
 785
 786                if (elf_ppnt->p_type != PT_LOAD)
 787                        continue;
 788
 789                if (unlikely (elf_brk > elf_bss)) {
 790                        unsigned long nbyte;
 791                    
 792                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 793                           before this one. Map anonymous pages, if needed,
 794                           and clear the area.  */
 795                        retval = set_brk (elf_bss + load_bias,
 796                                          elf_brk + load_bias);
 797                        if (retval) {
 798                                send_sig(SIGKILL, current, 0);
 799                                goto out_free_dentry;
 800                        }
 801                        nbyte = ELF_PAGEOFFSET(elf_bss);
 802                        if (nbyte) {
 803                                nbyte = ELF_MIN_ALIGN - nbyte;
 804                                if (nbyte > elf_brk - elf_bss)
 805                                        nbyte = elf_brk - elf_bss;
 806                                if (clear_user((void __user *)elf_bss +
 807                                                        load_bias, nbyte)) {
 808                                        /*
 809                                         * This bss-zeroing can fail if the ELF
 810                                         * file specifies odd protections.  So
 811                                         * we don't check the return value
 812                                         */
 813                                }
 814                        }
 815                }
 816
 817                if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
 818                if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 819                if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 820
 821                elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
 822
 823                vaddr = elf_ppnt->p_vaddr;
 824                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 825                        elf_flags |= MAP_FIXED;
 826                } else if (loc->elf_ex.e_type == ET_DYN) {
 827                        /* Try and get dynamic programs out of the way of the default mmap
 828                           base, as well as whatever program they might try to exec.  This
 829                           is because the brk will follow the loader, and is not movable.  */
 830                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 831                }
 832
 833                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
 834                if (BAD_ADDR(error)) {
 835                        send_sig(SIGKILL, current, 0);
 836                        goto out_free_dentry;
 837                }
 838
 839                if (!load_addr_set) {
 840                        load_addr_set = 1;
 841                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 842                        if (loc->elf_ex.e_type == ET_DYN) {
 843                                load_bias += error -
 844                                             ELF_PAGESTART(load_bias + vaddr);
 845                                load_addr += load_bias;
 846                                reloc_func_desc = load_bias;
 847                        }
 848                }
 849                k = elf_ppnt->p_vaddr;
 850                if (k < start_code) start_code = k;
 851                if (start_data < k) start_data = k;
 852
 853                /*
 854                 * Check to see if the section's size will overflow the
 855                 * allowed task size. Note that p_filesz must always be
 856                 * <= p_memsz so it is only necessary to check p_memsz.
 857                 */
 858                if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 859                    elf_ppnt->p_memsz > TASK_SIZE ||
 860                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 861                        /* set_brk can never work.  Avoid overflows.  */
 862                        send_sig(SIGKILL, current, 0);
 863                        goto out_free_dentry;
 864                }
 865
 866                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 867
 868                if (k > elf_bss)
 869                        elf_bss = k;
 870                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 871                        end_code = k;
 872                if (end_data < k)
 873                        end_data = k;
 874                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 875                if (k > elf_brk)
 876                        elf_brk = k;
 877        }
 878
 879        loc->elf_ex.e_entry += load_bias;
 880        elf_bss += load_bias;
 881        elf_brk += load_bias;
 882        start_code += load_bias;
 883        end_code += load_bias;
 884        start_data += load_bias;
 885        end_data += load_bias;
 886
 887        /* Calling set_brk effectively mmaps the pages that we need
 888         * for the bss and break sections.  We must do this before
 889         * mapping in the interpreter, to make sure it doesn't wind
 890         * up getting placed where the bss needs to go.
 891         */
 892        retval = set_brk(elf_bss, elf_brk);
 893        if (retval) {
 894                send_sig(SIGKILL, current, 0);
 895                goto out_free_dentry;
 896        }
 897        if (padzero(elf_bss)) {
 898                send_sig(SIGSEGV, current, 0);
 899                retval = -EFAULT; /* Nobody gets to see this, but.. */
 900                goto out_free_dentry;
 901        }
 902
 903        if (elf_interpreter) {
 904                if (interpreter_type == INTERPRETER_AOUT)
 905                        elf_entry = load_aout_interp(&loc->interp_ex,
 906                                                     interpreter);
 907                else
 908                        elf_entry = load_elf_interp(&loc->interp_elf_ex,
 909                                                    interpreter,
 910                                                    &interp_load_addr);
 911                if (BAD_ADDR(elf_entry)) {
 912                        printk(KERN_ERR "Unable to load interpreter %.128s\n",
 913                                elf_interpreter);
 914                        force_sig(SIGSEGV, current);
 915                        retval = -ENOEXEC; /* Nobody gets to see this, but.. */
 916                        goto out_free_dentry;
 917                }
 918                reloc_func_desc = interp_load_addr;
 919
 920                allow_write_access(interpreter);
 921                fput(interpreter);
 922                kfree(elf_interpreter);
 923        } else {
 924                elf_entry = loc->elf_ex.e_entry;
 925        }
 926
 927        kfree(elf_phdata);
 928
 929        if (interpreter_type != INTERPRETER_AOUT)
 930                sys_close(elf_exec_fileno);
 931
 932        set_binfmt(&elf_format);
 933
 934        compute_creds(bprm);
 935        current->flags &= ~PF_FORKNOEXEC;
 936        create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
 937                        load_addr, interp_load_addr);
 938        /* N.B. passed_fileno might not be initialized? */
 939        if (interpreter_type == INTERPRETER_AOUT)
 940                current->mm->arg_start += strlen(passed_fileno) + 1;
 941        current->mm->end_code = end_code;
 942        current->mm->start_code = start_code;
 943        current->mm->start_data = start_data;
 944        current->mm->end_data = end_data;
 945        current->mm->start_stack = bprm->p;
 946
 947        if (current->personality & MMAP_PAGE_ZERO) {
 948                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 949                   and some applications "depend" upon this behavior.
 950                   Since we do not have the power to recompile these, we
 951                   emulate the SVr4 behavior.  Sigh.  */
 952                down_write(&current->mm->mmap_sem);
 953                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 954                                MAP_FIXED | MAP_PRIVATE, 0);
 955                up_write(&current->mm->mmap_sem);
 956        }
 957
 958#ifdef ELF_PLAT_INIT
 959        /*
 960         * The ABI may specify that certain registers be set up in special
 961         * ways (on i386 %edx is the address of a DT_FINI function, for
 962         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 963         * that the e_entry field is the address of the function descriptor
 964         * for the startup routine, rather than the address of the startup
 965         * routine itself.  This macro performs whatever initialization to
 966         * the regs structure is required as well as any relocations to the
 967         * function descriptor entries when executing dynamically links apps.
 968         */
 969        ELF_PLAT_INIT(regs, reloc_func_desc);
 970#endif
 971
 972        start_thread(regs, elf_entry, bprm->p);
 973        if (unlikely(current->ptrace & PT_PTRACED)) {
 974                if (current->ptrace & PT_TRACE_EXEC)
 975                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
 976                else
 977                        send_sig(SIGTRAP, current, 0);
 978        }
 979        retval = 0;
 980out:
 981        kfree(loc);
 982out_ret:
 983        return retval;
 984
 985        /* error cleanup */
 986out_free_dentry:
 987        allow_write_access(interpreter);
 988        if (interpreter)
 989                fput(interpreter);
 990out_free_interp:
 991        if (elf_interpreter)
 992                kfree(elf_interpreter);
 993out_free_file:
 994        sys_close(elf_exec_fileno);
 995out_free_fh:
 996        if (files) {
 997                put_files_struct(current->files);
 998                current->files = files;
 999        }
1000out_free_ph:
1001        kfree(elf_phdata);
1002        goto out;
1003}
1004
1005/* This is really simpleminded and specialized - we are loading an
1006   a.out library that is given an ELF header. */
1007
1008static int load_elf_library(struct file *file)
1009{
1010        struct elf_phdr *elf_phdata;
1011        unsigned long elf_bss, bss, len;
1012        int retval, error, i, j;
1013        struct elfhdr elf_ex;
1014
1015        error = -ENOEXEC;
1016        retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
1017        if (retval != sizeof(elf_ex))
1018                goto out;
1019
1020        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1021                goto out;
1022
1023        /* First of all, some simple consistency checks */
1024        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1025           !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1026                goto out;
1027
1028        /* Now read in all of the header information */
1029
1030        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1031        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1032
1033        error = -ENOMEM;
1034        elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
1035        if (!elf_phdata)
1036                goto out;
1037
1038        error = -ENOEXEC;
1039        retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j);
1040        if (retval != j)
1041                goto out_free_ph;
1042
1043        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1044                if ((elf_phdata + i)->p_type == PT_LOAD) j++;
1045        if (j != 1)
1046                goto out_free_ph;
1047
1048        while (elf_phdata->p_type != PT_LOAD) elf_phdata++;
1049
1050        /* Now use mmap to map the library into memory. */
1051        down_write(&current->mm->mmap_sem);
1052        error = do_mmap(file,
1053                        ELF_PAGESTART(elf_phdata->p_vaddr),
1054                        (elf_phdata->p_filesz +
1055                         ELF_PAGEOFFSET(elf_phdata->p_vaddr)),
1056                        PROT_READ | PROT_WRITE | PROT_EXEC,
1057                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1058                        (elf_phdata->p_offset -
1059                         ELF_PAGEOFFSET(elf_phdata->p_vaddr)));
1060        up_write(&current->mm->mmap_sem);
1061        if (error != ELF_PAGESTART(elf_phdata->p_vaddr))
1062                goto out_free_ph;
1063
1064        elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz;
1065        if (padzero(elf_bss)) {
1066                error = -EFAULT;
1067                goto out_free_ph;
1068        }
1069
1070        len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
1071        bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
1072        if (bss > len) {
1073                down_write(&current->mm->mmap_sem);
1074                do_brk(len, bss - len);
1075                up_write(&current->mm->mmap_sem);
1076        }
1077        error = 0;
1078
1079out_free_ph:
1080        kfree(elf_phdata);
1081out:
1082        return error;
1083}
1084
1085/*
1086 * Note that some platforms still use traditional core dumps and not
1087 * the ELF core dump.  Each platform can select it as appropriate.
1088 */
1089#ifdef USE_ELF_CORE_DUMP
1090
1091/*
1092 * ELF core dumper
1093 *
1094 * Modelled on fs/exec.c:aout_core_dump()
1095 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1096 */
1097/*
1098 * These are the only things you should do on a core-file: use only these
1099 * functions to write out all the necessary info.
1100 */
1101static int dump_write(struct file *file, const void *addr, int nr)
1102{
1103        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1104}
1105
1106static int dump_seek(struct file *file, off_t off)
1107{
1108        if (file->f_op->llseek) {
1109                if (file->f_op->llseek(file, off, 0) != off)
1110                        return 0;
1111        } else
1112                file->f_pos = off;
1113        return 1;
1114}
1115
1116/*
1117 * Decide whether a segment is worth dumping; default is yes to be
1118 * sure (missing info is worse than too much; etc).
1119 * Personally I'd include everything, and use the coredump limit...
1120 *
1121 * I think we should skip something. But I am not sure how. H.J.
1122 */
1123static int maydump(struct vm_area_struct *vma)
1124{
1125        /* Do not dump I/O mapped devices, shared memory, or special mappings */
1126        if (vma->vm_flags & (VM_IO | VM_SHARED | VM_RESERVED))
1127                return 0;
1128
1129        /* If it hasn't been written to, don't write it out */
1130        if (!vma->anon_vma)
1131                return 0;
1132
1133        return 1;
1134}
1135
1136#define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
1137
1138/* An ELF note in memory */
1139struct memelfnote
1140{
1141        const char *name;
1142        int type;
1143        unsigned int datasz;
1144        void *data;
1145};
1146
1147static int notesize(struct memelfnote *en)
1148{
1149        int sz;
1150
1151        sz = sizeof(struct elf_note);
1152        sz += roundup(strlen(en->name) + 1, 4);
1153        sz += roundup(en->datasz, 4);
1154
1155        return sz;
1156}
1157
1158#define DUMP_WRITE(addr, nr)    \
1159        do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1160#define DUMP_SEEK(off)  \
1161        do { if (!dump_seek(file, (off))) return 0; } while(0)
1162
1163static int writenote(struct memelfnote *men, struct file *file)
1164{
1165        struct elf_note en;
1166
1167        en.n_namesz = strlen(men->name) + 1;
1168        en.n_descsz = men->datasz;
1169        en.n_type = men->type;
1170
1171        DUMP_WRITE(&en, sizeof(en));
1172        DUMP_WRITE(men->name, en.n_namesz);
1173        /* XXX - cast from long long to long to avoid need for libgcc.a */
1174        DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));      /* XXX */
1175        DUMP_WRITE(men->data, men->datasz);
1176        DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));      /* XXX */
1177
1178        return 1;
1179}
1180#undef DUMP_WRITE
1181#undef DUMP_SEEK
1182
1183#define DUMP_WRITE(addr, nr)    \
1184        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1185                goto end_coredump;
1186#define DUMP_SEEK(off)  \
1187        if (!dump_seek(file, (off))) \
1188                goto end_coredump;
1189
1190static inline void fill_elf_header(struct elfhdr *elf, int segs)
1191{
1192        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1193        elf->e_ident[EI_CLASS] = ELF_CLASS;
1194        elf->e_ident[EI_DATA] = ELF_DATA;
1195        elf->e_ident[EI_VERSION] = EV_CURRENT;
1196        elf->e_ident[EI_OSABI] = ELF_OSABI;
1197        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1198
1199        elf->e_type = ET_CORE;
1200        elf->e_machine = ELF_ARCH;
1201        elf->e_version = EV_CURRENT;
1202        elf->e_entry = 0;
1203        elf->e_phoff = sizeof(struct elfhdr);
1204        elf->e_shoff = 0;
1205        elf->e_flags = ELF_CORE_EFLAGS;
1206        elf->e_ehsize = sizeof(struct elfhdr);
1207        elf->e_phentsize = sizeof(struct elf_phdr);
1208        elf->e_phnum = segs;
1209        elf->e_shentsize = 0;
1210        elf->e_shnum = 0;
1211        elf->e_shstrndx = 0;
1212        return;
1213}
1214
1215static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1216{
1217        phdr->p_type = PT_NOTE;
1218        phdr->p_offset = offset;
1219        phdr->p_vaddr = 0;
1220        phdr->p_paddr = 0;
1221        phdr->p_filesz = sz;
1222        phdr->p_memsz = 0;
1223        phdr->p_flags = 0;
1224        phdr->p_align = 0;
1225        return;
1226}
1227
1228static void fill_note(struct memelfnote *note, const char *name, int type, 
1229                unsigned int sz, void *data)
1230{
1231        note->name = name;
1232        note->type = type;
1233        note->datasz = sz;
1234        note->data = data;
1235        return;
1236}
1237
1238/*
1239 * fill up all the fields in prstatus from the given task struct, except registers
1240 * which need to be filled up separately.
1241 */
1242static void fill_prstatus(struct elf_prstatus *prstatus,
1243                        struct task_struct *p, long signr) 
1244{
1245        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1246        prstatus->pr_sigpend = p->pending.signal.sig[0];
1247        prstatus->pr_sighold = p->blocked.sig[0];
1248        prstatus->pr_pid = p->pid;
1249        prstatus->pr_ppid = p->parent->pid;
1250        prstatus->pr_pgrp = process_group(p);
1251        prstatus->pr_sid = p->signal->session;
1252        if (thread_group_leader(p)) {
1253                /*
1254                 * This is the record for the group leader.  Add in the
1255                 * cumulative times of previous dead threads.  This total
1256                 * won't include the time of each live thread whose state
1257                 * is included in the core dump.  The final total reported
1258                 * to our parent process when it calls wait4 will include
1259                 * those sums as well as the little bit more time it takes
1260                 * this and each other thread to finish dying after the
1261                 * core dump synchronization phase.
1262                 */
1263                cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1264                                   &prstatus->pr_utime);
1265                cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1266                                   &prstatus->pr_stime);
1267        } else {
1268                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1269                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1270        }
1271        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1272        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1273}
1274
1275static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1276                       struct mm_struct *mm)
1277{
1278        int i, len;
1279        
1280        /* first copy the parameters from user space */
1281        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1282
1283        len = mm->arg_end - mm->arg_start;
1284        if (len >= ELF_PRARGSZ)
1285                len = ELF_PRARGSZ-1;
1286        if (copy_from_user(&psinfo->pr_psargs,
1287                           (const char __user *)mm->arg_start, len))
1288                return -EFAULT;
1289        for(i = 0; i < len; i++)
1290                if (psinfo->pr_psargs[i] == 0)
1291                        psinfo->pr_psargs[i] = ' ';
1292        psinfo->pr_psargs[len] = 0;
1293
1294        psinfo->pr_pid = p->pid;
1295        psinfo->pr_ppid = p->parent->pid;
1296        psinfo->pr_pgrp = process_group(p);
1297        psinfo->pr_sid = p->signal->session;
1298
1299        i = p->state ? ffz(~p->state) + 1 : 0;
1300        psinfo->pr_state = i;
1301        psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDTZW"[i];
1302        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1303        psinfo->pr_nice = task_nice(p);
1304        psinfo->pr_flag = p->flags;
1305        SET_UID(psinfo->pr_uid, p->uid);
1306        SET_GID(psinfo->pr_gid, p->gid);
1307        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1308        
1309        return 0;
1310}
1311
1312/* Here is the structure in which status of each thread is captured. */
1313struct elf_thread_status
1314{
1315        struct list_head list;
1316        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1317        elf_fpregset_t fpu;             /* NT_PRFPREG */
1318        struct task_struct *thread;
1319#ifdef ELF_CORE_COPY_XFPREGS
1320        elf_fpxregset_t xfpu;           /* NT_PRXFPREG */
1321#endif
1322        struct memelfnote notes[3];
1323        int num_notes;
1324};
1325
1326/*
1327 * In order to add the specific thread information for the elf file format,
1328 * we need to keep a linked list of every threads pr_status and then
1329 * create a single section for them in the final core file.
1330 */
1331static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1332{
1333        int sz = 0;
1334        struct task_struct *p = t->thread;
1335        t->num_notes = 0;
1336
1337        fill_prstatus(&t->prstatus, p, signr);
1338        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1339        
1340        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
1341        t->num_notes++;
1342        sz += notesize(&t->notes[0]);
1343
1344        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
1345                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
1346                t->num_notes++;
1347                sz += notesize(&t->notes[1]);
1348        }
1349
1350#ifdef ELF_CORE_COPY_XFPREGS
1351        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1352                fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
1353                t->num_notes++;
1354                sz += notesize(&t->notes[2]);
1355        }
1356#endif  
1357        return sz;
1358}
1359
1360/*
1361 * Actual dumper
1362 *
1363 * This is a two-pass process; first we find the offsets of the bits,
1364 * and then they are actually written out.  If we run out of core limit
1365 * we just truncate.
1366 */
1367static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1368{
1369#define NUM_NOTES       6
1370        int has_dumped = 0;
1371        mm_segment_t fs;
1372        int segs;
1373        size_t size = 0;
1374        int i;
1375        struct vm_area_struct *vma;
1376        struct elfhdr *elf = NULL;
1377        off_t offset = 0, dataoff;
1378        unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1379        int numnote;
1380        struct memelfnote *notes = NULL;
1381        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
1382        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
1383        struct task_struct *g, *p;
1384        LIST_HEAD(thread_list);
1385        struct list_head *t;
1386        elf_fpregset_t *fpu = NULL;
1387#ifdef ELF_CORE_COPY_XFPREGS
1388        elf_fpxregset_t *xfpu = NULL;
1389#endif
1390        int thread_status_size = 0;
1391        elf_addr_t *auxv;
1392
1393        /*
1394         * We no longer stop all VM operations.
1395         * 
1396         * This is because those proceses that could possibly change map_count or
1397         * the mmap / vma pages are now blocked in do_exit on current finishing
1398         * this core dump.
1399         *
1400         * Only ptrace can touch these memory addresses, but it doesn't change
1401         * the map_count or the pages allocated.  So no possibility of crashing
1402         * exists while dumping the mm->vm_next areas to the core file.
1403         */
1404  
1405        /* alloc memory for large data structures: too large to be on stack */
1406        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1407        if (!elf)
1408                goto cleanup;
1409        prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1410        if (!prstatus)
1411                goto cleanup;
1412        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1413        if (!psinfo)
1414                goto cleanup;
1415        notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1416        if (!notes)
1417                goto cleanup;
1418        fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1419        if (!fpu)
1420                goto cleanup;
1421#ifdef ELF_CORE_COPY_XFPREGS
1422        xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1423        if (!xfpu)
1424                goto cleanup;
1425#endif
1426
1427        if (signr) {
1428                struct elf_thread_status *tmp;
1429                read_lock(&tasklist_lock);
1430                do_each_thread(g,p)
1431                        if (current->mm == p->mm && current != p) {
1432                                tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
1433                                if (!tmp) {
1434                                        read_unlock(&tasklist_lock);
1435                                        goto cleanup;
1436                                }
1437                                memset(tmp, 0, sizeof(*tmp));
1438                                INIT_LIST_HEAD(&tmp->list);
1439                                tmp->thread = p;
1440                                list_add(&tmp->list, &thread_list);
1441                        }
1442                while_each_thread(g,p);
1443                read_unlock(&tasklist_lock);
1444                list_for_each(t, &thread_list) {
1445                        struct elf_thread_status *tmp;
1446                        int sz;
1447
1448                        tmp = list_entry(t, struct elf_thread_status, list);
1449                        sz = elf_dump_thread_status(signr, tmp);
1450                        thread_status_size += sz;
1451                }
1452        }
1453        /* now collect the dump for the current */
1454        memset(prstatus, 0, sizeof(*prstatus));
1455        fill_prstatus(prstatus, current, signr);
1456        elf_core_copy_regs(&prstatus->pr_reg, regs);
1457        
1458        segs = current->mm->map_count;
1459#ifdef ELF_CORE_EXTRA_PHDRS
1460        segs += ELF_CORE_EXTRA_PHDRS;
1461#endif
1462
1463        /* Set up header */
1464        fill_elf_header(elf, segs+1);   /* including notes section */
1465
1466        has_dumped = 1;
1467        current->flags |= PF_DUMPCORE;
1468
1469        /*
1470         * Set up the notes in similar form to SVR4 core dumps made
1471         * with info from their /proc.
1472         */
1473
1474        fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1475        
1476        fill_psinfo(psinfo, current->group_leader, current->mm);
1477        fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1478        
1479        fill_note(notes +2, "CORE", NT_TASKSTRUCT, sizeof(*current), current);
1480  
1481        numnote = 3;
1482
1483        auxv = (elf_addr_t *) current->mm->saved_auxv;
1484
1485        i = 0;
1486        do
1487                i += 2;
1488        while (auxv[i - 2] != AT_NULL);
1489        fill_note(&notes[numnote++], "CORE", NT_AUXV,
1490                  i * sizeof (elf_addr_t), auxv);
1491
1492        /* Try to dump the FPU. */
1493        if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
1494                fill_note(notes + numnote++,
1495                          "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1496#ifdef ELF_CORE_COPY_XFPREGS
1497        if (elf_core_copy_task_xfpregs(current, xfpu))
1498                fill_note(notes + numnote++,
1499                          "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1500#endif  
1501  
1502        fs = get_fs();
1503        set_fs(KERNEL_DS);
1504
1505        DUMP_WRITE(elf, sizeof(*elf));
1506        offset += sizeof(*elf);                         /* Elf header */
1507        offset += (segs+1) * sizeof(struct elf_phdr);   /* Program headers */
1508
1509        /* Write notes phdr entry */
1510        {
1511                struct elf_phdr phdr;
1512                int sz = 0;
1513
1514                for (i = 0; i < numnote; i++)
1515                        sz += notesize(notes + i);
1516                
1517                sz += thread_status_size;
1518
1519                fill_elf_note_phdr(&phdr, sz, offset);
1520                offset += sz;
1521                DUMP_WRITE(&phdr, sizeof(phdr));
1522        }
1523
1524        /* Page-align dumped data */
1525        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1526
1527        /* Write program headers for segments dump */
1528        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1529                struct elf_phdr phdr;
1530                size_t sz;
1531
1532                sz = vma->vm_end - vma->vm_start;
1533
1534                phdr.p_type = PT_LOAD;
1535                phdr.p_offset = offset;
1536                phdr.p_vaddr = vma->vm_start;
1537                phdr.p_paddr = 0;
1538                phdr.p_filesz = maydump(vma) ? sz : 0;
1539                phdr.p_memsz = sz;
1540                offset += phdr.p_filesz;
1541                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1542                if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
1543                if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
1544                phdr.p_align = ELF_EXEC_PAGESIZE;
1545
1546                DUMP_WRITE(&phdr, sizeof(phdr));
1547        }
1548
1549#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1550        ELF_CORE_WRITE_EXTRA_PHDRS;
1551#endif
1552
1553        /* write out the notes section */
1554        for (i = 0; i < numnote; i++)
1555                if (!writenote(notes + i, file))
1556                        goto end_coredump;
1557
1558        /* write out the thread status notes section */
1559        list_for_each(t, &thread_list) {
1560                struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
1561                for (i = 0; i < tmp->num_notes; i++)
1562                        if (!writenote(&tmp->notes[i], file))
1563                                goto end_coredump;
1564        }
1565 
1566        DUMP_SEEK(dataoff);
1567
1568        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1569                unsigned long addr;
1570
1571                if (!maydump(vma))
1572                        continue;
1573
1574                for (addr = vma->vm_start;
1575                     addr < vma->vm_end;
1576                     addr += PAGE_SIZE) {
1577                        struct page* page;
1578                        struct vm_area_struct *vma;
1579
1580                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1581                                                &page, &vma) <= 0) {
1582                                DUMP_SEEK (file->f_pos + PAGE_SIZE);
1583                        } else {
1584                                if (page == ZERO_PAGE(addr)) {
1585                                        DUMP_SEEK (file->f_pos + PAGE_SIZE);
1586                                } else {
1587                                        void *kaddr;
1588                                        flush_cache_page(vma, addr);
1589                                        kaddr = kmap(page);
1590                                        if ((size += PAGE_SIZE) > limit ||
1591                                            !dump_write(file, kaddr,
1592                                            PAGE_SIZE)) {
1593                                                kunmap(page);
1594                                                page_cache_release(page);
1595                                                goto end_coredump;
1596                                        }
1597                                        kunmap(page);
1598                                }
1599                                page_cache_release(page);
1600                        }
1601                }
1602        }
1603
1604#ifdef ELF_CORE_WRITE_EXTRA_DATA
1605        ELF_CORE_WRITE_EXTRA_DATA;
1606#endif
1607
1608        if ((off_t) file->f_pos != offset) {
1609                /* Sanity check */
1610                printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1611                       (off_t) file->f_pos, offset);
1612        }
1613
1614end_coredump:
1615        set_fs(fs);
1616
1617cleanup:
1618        while(!list_empty(&thread_list)) {
1619                struct list_head *tmp = thread_list.next;
1620                list_del(tmp);
1621                kfree(list_entry(tmp, struct elf_thread_status, list));
1622        }
1623
1624        kfree(elf);
1625        kfree(prstatus);
1626        kfree(psinfo);
1627        kfree(notes);
1628        kfree(fpu);
1629#ifdef ELF_CORE_COPY_XFPREGS
1630        kfree(xfpu);
1631#endif
1632        return has_dumped;
1633#undef NUM_NOTES
1634}
1635
1636#endif          /* USE_ELF_CORE_DUMP */
1637
1638static int __init init_elf_binfmt(void)
1639{
1640        return register_binfmt(&elf_format);
1641}
1642
1643static void __exit exit_elf_binfmt(void)
1644{
1645        /* Remove the COFF and ELF loaders. */
1646        unregister_binfmt(&elf_format);
1647}
1648
1649core_initcall(init_elf_binfmt);
1650module_exit(exit_elf_binfmt);
1651MODULE_LICENSE("GPL");
1652
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.