linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/a.out.h>
  20#include <linux/errno.h>
  21#include <linux/signal.h>
  22#include <linux/binfmts.h>
  23#include <linux/string.h>
  24#include <linux/file.h>
  25#include <linux/fcntl.h>
  26#include <linux/ptrace.h>
  27#include <linux/slab.h>
  28#include <linux/shm.h>
  29#include <linux/personality.h>
  30#include <linux/elfcore.h>
  31#include <linux/init.h>
  32#include <linux/highuid.h>
  33#include <linux/smp.h>
  34#include <linux/compiler.h>
  35#include <linux/highmem.h>
  36#include <linux/pagemap.h>
  37#include <linux/security.h>
  38#include <linux/syscalls.h>
  39#include <linux/random.h>
  40#include <linux/elf.h>
  41#include <linux/utsname.h>
  42#include <asm/uaccess.h>
  43#include <asm/param.h>
  44#include <asm/page.h>
  45
  46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  47static int load_elf_library(struct file *);
  48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
  49
  50/*
  51 * If we don't support core dumping, then supply a NULL so we
  52 * don't even try.
  53 */
  54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
  56#else
  57#define elf_core_dump   NULL
  58#endif
  59
  60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  61#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  62#else
  63#define ELF_MIN_ALIGN   PAGE_SIZE
  64#endif
  65
  66#ifndef ELF_CORE_EFLAGS
  67#define ELF_CORE_EFLAGS 0
  68#endif
  69
  70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  73
  74static struct linux_binfmt elf_format = {
  75                .module         = THIS_MODULE,
  76                .load_binary    = load_elf_binary,
  77                .load_shlib     = load_elf_library,
  78                .core_dump      = elf_core_dump,
  79                .min_coredump   = ELF_EXEC_PAGESIZE,
  80                .hasvdso        = 1
  81};
  82
  83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  84
  85static int set_brk(unsigned long start, unsigned long end)
  86{
  87        start = ELF_PAGEALIGN(start);
  88        end = ELF_PAGEALIGN(end);
  89        if (end > start) {
  90                unsigned long addr;
  91                down_write(&current->mm->mmap_sem);
  92                addr = do_brk(start, end - start);
  93                up_write(&current->mm->mmap_sem);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a litle clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134static int
 135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 136                int interp_aout, unsigned long load_addr,
 137                unsigned long interp_load_addr)
 138{
 139        unsigned long p = bprm->p;
 140        int argc = bprm->argc;
 141        int envc = bprm->envc;
 142        elf_addr_t __user *argv;
 143        elf_addr_t __user *envp;
 144        elf_addr_t __user *sp;
 145        elf_addr_t __user *u_platform;
 146        const char *k_platform = ELF_PLATFORM;
 147        int items;
 148        elf_addr_t *elf_info;
 149        int ei_index = 0;
 150        struct task_struct *tsk = current;
 151
 152        /*
 153         * If this architecture has a platform capability string, copy it
 154         * to userspace.  In some cases (Sparc), this info is impossible
 155         * for userspace to get any other way, in others (i386) it is
 156         * merely difficult.
 157         */
 158        u_platform = NULL;
 159        if (k_platform) {
 160                size_t len = strlen(k_platform) + 1;
 161
 162                /*
 163                 * In some cases (e.g. Hyper-Threading), we want to avoid L1
 164                 * evictions by the processes running on the same package. One
 165                 * thing we can do is to shuffle the initial stack for them.
 166                 */
 167
 168                p = arch_align_stack(p);
 169
 170                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 171                if (__copy_to_user(u_platform, k_platform, len))
 172                        return -EFAULT;
 173        }
 174
 175        /* Create the ELF interpreter info */
 176        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 177#define NEW_AUX_ENT(id, val) \
 178        do { \
 179                elf_info[ei_index++] = id; \
 180                elf_info[ei_index++] = val; \
 181        } while (0)
 182
 183#ifdef ARCH_DLINFO
 184        /* 
 185         * ARCH_DLINFO must come first so PPC can do its special alignment of
 186         * AUXV.
 187         */
 188        ARCH_DLINFO;
 189#endif
 190        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 191        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 192        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 193        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 194        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 195        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 196        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 197        NEW_AUX_ENT(AT_FLAGS, 0);
 198        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 199        NEW_AUX_ENT(AT_UID, tsk->uid);
 200        NEW_AUX_ENT(AT_EUID, tsk->euid);
 201        NEW_AUX_ENT(AT_GID, tsk->gid);
 202        NEW_AUX_ENT(AT_EGID, tsk->egid);
 203        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 204        if (k_platform) {
 205                NEW_AUX_ENT(AT_PLATFORM,
 206                            (elf_addr_t)(unsigned long)u_platform);
 207        }
 208        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 209                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 210        }
 211#undef NEW_AUX_ENT
 212        /* AT_NULL is zero; clear the rest too */
 213        memset(&elf_info[ei_index], 0,
 214               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 215
 216        /* And advance past the AT_NULL entry.  */
 217        ei_index += 2;
 218
 219        sp = STACK_ADD(p, ei_index);
 220
 221        items = (argc + 1) + (envc + 1);
 222        if (interp_aout) {
 223                items += 3; /* a.out interpreters require argv & envp too */
 224        } else {
 225                items += 1; /* ELF interpreters only put argc on the stack */
 226        }
 227        bprm->p = STACK_ROUND(sp, items);
 228
 229        /* Point sp at the lowest address on the stack */
 230#ifdef CONFIG_STACK_GROWSUP
 231        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 232        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 233#else
 234        sp = (elf_addr_t __user *)bprm->p;
 235#endif
 236
 237        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 238        if (__put_user(argc, sp++))
 239                return -EFAULT;
 240        if (interp_aout) {
 241                argv = sp + 2;
 242                envp = argv + argc + 1;
 243                if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
 244                    __put_user((elf_addr_t)(unsigned long)envp, sp++))
 245                        return -EFAULT;
 246        } else {
 247                argv = sp;
 248                envp = argv + argc + 1;
 249        }
 250
 251        /* Populate argv and envp */
 252        p = current->mm->arg_end = current->mm->arg_start;
 253        while (argc-- > 0) {
 254                size_t len;
 255                if (__put_user((elf_addr_t)p, argv++))
 256                        return -EFAULT;
 257                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 258                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 259                        return 0;
 260                p += len;
 261        }
 262        if (__put_user(0, argv))
 263                return -EFAULT;
 264        current->mm->arg_end = current->mm->env_start = p;
 265        while (envc-- > 0) {
 266                size_t len;
 267                if (__put_user((elf_addr_t)p, envp++))
 268                        return -EFAULT;
 269                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 270                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 271                        return 0;
 272                p += len;
 273        }
 274        if (__put_user(0, envp))
 275                return -EFAULT;
 276        current->mm->env_end = p;
 277
 278        /* Put the elf_info on the stack in the right place.  */
 279        sp = (elf_addr_t __user *)envp + 1;
 280        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 281                return -EFAULT;
 282        return 0;
 283}
 284
 285#ifndef elf_map
 286
 287static unsigned long elf_map(struct file *filep, unsigned long addr,
 288                struct elf_phdr *eppnt, int prot, int type)
 289{
 290        unsigned long map_addr;
 291        unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
 292
 293        down_write(&current->mm->mmap_sem);
 294        /* mmap() will return -EINVAL if given a zero size, but a
 295         * segment with zero filesize is perfectly valid */
 296        if (eppnt->p_filesz + pageoffset)
 297                map_addr = do_mmap(filep, ELF_PAGESTART(addr),
 298                                   eppnt->p_filesz + pageoffset, prot, type,
 299                                   eppnt->p_offset - pageoffset);
 300        else
 301                map_addr = ELF_PAGESTART(addr);
 302        up_write(&current->mm->mmap_sem);
 303        return(map_addr);
 304}
 305
 306#endif /* !elf_map */
 307
 308/* This is much more generalized than the library routine read function,
 309   so we keep this separate.  Technically the library read function
 310   is only provided so that we can read a.out libraries that have
 311   an ELF header */
 312
 313static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 314                struct file *interpreter, unsigned long *interp_load_addr)
 315{
 316        struct elf_phdr *elf_phdata;
 317        struct elf_phdr *eppnt;
 318        unsigned long load_addr = 0;
 319        int load_addr_set = 0;
 320        unsigned long last_bss = 0, elf_bss = 0;
 321        unsigned long error = ~0UL;
 322        int retval, i, size;
 323
 324        /* First of all, some simple consistency checks */
 325        if (interp_elf_ex->e_type != ET_EXEC &&
 326            interp_elf_ex->e_type != ET_DYN)
 327                goto out;
 328        if (!elf_check_arch(interp_elf_ex))
 329                goto out;
 330        if (!interpreter->f_op || !interpreter->f_op->mmap)
 331                goto out;
 332
 333        /*
 334         * If the size of this structure has changed, then punt, since
 335         * we will be doing the wrong thing.
 336         */
 337        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 338                goto out;
 339        if (interp_elf_ex->e_phnum < 1 ||
 340                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 341                goto out;
 342
 343        /* Now read in all of the header information */
 344        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 345        if (size > ELF_MIN_ALIGN)
 346                goto out;
 347        elf_phdata = kmalloc(size, GFP_KERNEL);
 348        if (!elf_phdata)
 349                goto out;
 350
 351        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 352                             (char *)elf_phdata,size);
 353        error = -EIO;
 354        if (retval != size) {
 355                if (retval < 0)
 356                        error = retval; 
 357                goto out_close;
 358        }
 359
 360        eppnt = elf_phdata;
 361        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 362                if (eppnt->p_type == PT_LOAD) {
 363                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 364                        int elf_prot = 0;
 365                        unsigned long vaddr = 0;
 366                        unsigned long k, map_addr;
 367
 368                        if (eppnt->p_flags & PF_R)
 369                                elf_prot = PROT_READ;
 370                        if (eppnt->p_flags & PF_W)
 371                                elf_prot |= PROT_WRITE;
 372                        if (eppnt->p_flags & PF_X)
 373                                elf_prot |= PROT_EXEC;
 374                        vaddr = eppnt->p_vaddr;
 375                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 376                                elf_type |= MAP_FIXED;
 377
 378                        map_addr = elf_map(interpreter, load_addr + vaddr,
 379                                           eppnt, elf_prot, elf_type);
 380                        error = map_addr;
 381                        if (BAD_ADDR(map_addr))
 382                                goto out_close;
 383
 384                        if (!load_addr_set &&
 385                            interp_elf_ex->e_type == ET_DYN) {
 386                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 387                                load_addr_set = 1;
 388                        }
 389
 390                        /*
 391                         * Check to see if the section's size will overflow the
 392                         * allowed task size. Note that p_filesz must always be
 393                         * <= p_memsize so it's only necessary to check p_memsz.
 394                         */
 395                        k = load_addr + eppnt->p_vaddr;
 396                        if (BAD_ADDR(k) ||
 397                            eppnt->p_filesz > eppnt->p_memsz ||
 398                            eppnt->p_memsz > TASK_SIZE ||
 399                            TASK_SIZE - eppnt->p_memsz < k) {
 400                                error = -ENOMEM;
 401                                goto out_close;
 402                        }
 403
 404                        /*
 405                         * Find the end of the file mapping for this phdr, and
 406                         * keep track of the largest address we see for this.
 407                         */
 408                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 409                        if (k > elf_bss)
 410                                elf_bss = k;
 411
 412                        /*
 413                         * Do the same thing for the memory mapping - between
 414                         * elf_bss and last_bss is the bss section.
 415                         */
 416                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 417                        if (k > last_bss)
 418                                last_bss = k;
 419                }
 420        }
 421
 422        /*
 423         * Now fill out the bss section.  First pad the last page up
 424         * to the page boundary, and then perform a mmap to make sure
 425         * that there are zero-mapped pages up to and including the 
 426         * last bss page.
 427         */
 428        if (padzero(elf_bss)) {
 429                error = -EFAULT;
 430                goto out_close;
 431        }
 432
 433        /* What we have mapped so far */
 434        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 435
 436        /* Map the last of the bss segment */
 437        if (last_bss > elf_bss) {
 438                down_write(&current->mm->mmap_sem);
 439                error = do_brk(elf_bss, last_bss - elf_bss);
 440                up_write(&current->mm->mmap_sem);
 441                if (BAD_ADDR(error))
 442                        goto out_close;
 443        }
 444
 445        *interp_load_addr = load_addr;
 446        error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
 447
 448out_close:
 449        kfree(elf_phdata);
 450out:
 451        return error;
 452}
 453
 454static unsigned long load_aout_interp(struct exec *interp_ex,
 455                struct file *interpreter)
 456{
 457        unsigned long text_data, elf_entry = ~0UL;
 458        char __user * addr;
 459        loff_t offset;
 460
 461        current->mm->end_code = interp_ex->a_text;
 462        text_data = interp_ex->a_text + interp_ex->a_data;
 463        current->mm->end_data = text_data;
 464        current->mm->brk = interp_ex->a_bss + text_data;
 465
 466        switch (N_MAGIC(*interp_ex)) {
 467        case OMAGIC:
 468                offset = 32;
 469                addr = (char __user *)0;
 470                break;
 471        case ZMAGIC:
 472        case QMAGIC:
 473                offset = N_TXTOFF(*interp_ex);
 474                addr = (char __user *)N_TXTADDR(*interp_ex);
 475                break;
 476        default:
 477                goto out;
 478        }
 479
 480        down_write(&current->mm->mmap_sem);     
 481        do_brk(0, text_data);
 482        up_write(&current->mm->mmap_sem);
 483        if (!interpreter->f_op || !interpreter->f_op->read)
 484                goto out;
 485        if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
 486                goto out;
 487        flush_icache_range((unsigned long)addr,
 488                           (unsigned long)addr + text_data);
 489
 490        down_write(&current->mm->mmap_sem);     
 491        do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 492                interp_ex->a_bss);
 493        up_write(&current->mm->mmap_sem);
 494        elf_entry = interp_ex->a_entry;
 495
 496out:
 497        return elf_entry;
 498}
 499
 500/*
 501 * These are the functions used to load ELF style executables and shared
 502 * libraries.  There is no binary dependent code anywhere else.
 503 */
 504
 505#define INTERPRETER_NONE 0
 506#define INTERPRETER_AOUT 1
 507#define INTERPRETER_ELF 2
 508
 509#ifndef STACK_RND_MASK
 510#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 511#endif
 512
 513static unsigned long randomize_stack_top(unsigned long stack_top)
 514{
 515        unsigned int random_variable = 0;
 516
 517        if ((current->flags & PF_RANDOMIZE) &&
 518                !(current->personality & ADDR_NO_RANDOMIZE)) {
 519                random_variable = get_random_int() & STACK_RND_MASK;
 520                random_variable <<= PAGE_SHIFT;
 521        }
 522#ifdef CONFIG_STACK_GROWSUP
 523        return PAGE_ALIGN(stack_top) + random_variable;
 524#else
 525        return PAGE_ALIGN(stack_top) - random_variable;
 526#endif
 527}
 528
 529static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 530{
 531        struct file *interpreter = NULL; /* to shut gcc up */
 532        unsigned long load_addr = 0, load_bias = 0;
 533        int load_addr_set = 0;
 534        char * elf_interpreter = NULL;
 535        unsigned int interpreter_type = INTERPRETER_NONE;
 536        unsigned char ibcs2_interpreter = 0;
 537        unsigned long error;
 538        struct elf_phdr *elf_ppnt, *elf_phdata;
 539        unsigned long elf_bss, elf_brk;
 540        int elf_exec_fileno;
 541        int retval, i;
 542        unsigned int size;
 543        unsigned long elf_entry, interp_load_addr = 0;
 544        unsigned long start_code, end_code, start_data, end_data;
 545        unsigned long reloc_func_desc = 0;
 546        char passed_fileno[6];
 547        struct files_struct *files;
 548        int executable_stack = EXSTACK_DEFAULT;
 549        unsigned long def_flags = 0;
 550        struct {
 551                struct elfhdr elf_ex;
 552                struct elfhdr interp_elf_ex;
 553                struct exec interp_ex;
 554        } *loc;
 555
 556        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 557        if (!loc) {
 558                retval = -ENOMEM;
 559                goto out_ret;
 560        }
 561        
 562        /* Get the exec-header */
 563        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 564
 565        retval = -ENOEXEC;
 566        /* First of all, some simple consistency checks */
 567        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 568                goto out;
 569
 570        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 571                goto out;
 572        if (!elf_check_arch(&loc->elf_ex))
 573                goto out;
 574        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 575                goto out;
 576
 577        /* Now read in all of the header information */
 578        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 579                goto out;
 580        if (loc->elf_ex.e_phnum < 1 ||
 581                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 582                goto out;
 583        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 584        retval = -ENOMEM;
 585        elf_phdata = kmalloc(size, GFP_KERNEL);
 586        if (!elf_phdata)
 587                goto out;
 588
 589        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 590                             (char *)elf_phdata, size);
 591        if (retval != size) {
 592                if (retval >= 0)
 593                        retval = -EIO;
 594                goto out_free_ph;
 595        }
 596
 597        files = current->files; /* Refcounted so ok */
 598        retval = unshare_files();
 599        if (retval < 0)
 600                goto out_free_ph;
 601        if (files == current->files) {
 602                put_files_struct(files);
 603                files = NULL;
 604        }
 605
 606        /* exec will make our files private anyway, but for the a.out
 607           loader stuff we need to do it earlier */
 608        retval = get_unused_fd();
 609        if (retval < 0)
 610                goto out_free_fh;
 611        get_file(bprm->file);
 612        fd_install(elf_exec_fileno = retval, bprm->file);
 613
 614        elf_ppnt = elf_phdata;
 615        elf_bss = 0;
 616        elf_brk = 0;
 617
 618        start_code = ~0UL;
 619        end_code = 0;
 620        start_data = 0;
 621        end_data = 0;
 622
 623        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 624                if (elf_ppnt->p_type == PT_INTERP) {
 625                        /* This is the program interpreter used for
 626                         * shared libraries - for now assume that this
 627                         * is an a.out format binary
 628                         */
 629                        retval = -ENOEXEC;
 630                        if (elf_ppnt->p_filesz > PATH_MAX || 
 631                            elf_ppnt->p_filesz < 2)
 632                                goto out_free_file;
 633
 634                        retval = -ENOMEM;
 635                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 636                                                  GFP_KERNEL);
 637                        if (!elf_interpreter)
 638                                goto out_free_file;
 639
 640                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 641                                             elf_interpreter,
 642                                             elf_ppnt->p_filesz);
 643                        if (retval != elf_ppnt->p_filesz) {
 644                                if (retval >= 0)
 645                                        retval = -EIO;
 646                                goto out_free_interp;
 647                        }
 648                        /* make sure path is NULL terminated */
 649                        retval = -ENOEXEC;
 650                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 651                                goto out_free_interp;
 652
 653                        /* If the program interpreter is one of these two,
 654                         * then assume an iBCS2 image. Otherwise assume
 655                         * a native linux image.
 656                         */
 657                        if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 658                            strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 659                                ibcs2_interpreter = 1;
 660
 661                        /*
 662                         * The early SET_PERSONALITY here is so that the lookup
 663                         * for the interpreter happens in the namespace of the 
 664                         * to-be-execed image.  SET_PERSONALITY can select an
 665                         * alternate root.
 666                         *
 667                         * However, SET_PERSONALITY is NOT allowed to switch
 668                         * this task into the new images's memory mapping
 669                         * policy - that is, TASK_SIZE must still evaluate to
 670                         * that which is appropriate to the execing application.
 671                         * This is because exit_mmap() needs to have TASK_SIZE
 672                         * evaluate to the size of the old image.
 673                         *
 674                         * So if (say) a 64-bit application is execing a 32-bit
 675                         * application it is the architecture's responsibility
 676                         * to defer changing the value of TASK_SIZE until the
 677                         * switch really is going to happen - do this in
 678                         * flush_thread().      - akpm
 679                         */
 680                        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 681
 682                        interpreter = open_exec(elf_interpreter);
 683                        retval = PTR_ERR(interpreter);
 684                        if (IS_ERR(interpreter))
 685                                goto out_free_interp;
 686
 687                        /*
 688                         * If the binary is not readable then enforce
 689                         * mm->dumpable = 0 regardless of the interpreter's
 690                         * permissions.
 691                         */
 692                        if (file_permission(interpreter, MAY_READ) < 0)
 693                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 694
 695                        retval = kernel_read(interpreter, 0, bprm->buf,
 696                                             BINPRM_BUF_SIZE);
 697                        if (retval != BINPRM_BUF_SIZE) {
 698                                if (retval >= 0)
 699                                        retval = -EIO;
 700                                goto out_free_dentry;
 701                        }
 702
 703                        /* Get the exec headers */
 704                        loc->interp_ex = *((struct exec *)bprm->buf);
 705                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 706                        break;
 707                }
 708                elf_ppnt++;
 709        }
 710
 711        elf_ppnt = elf_phdata;
 712        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 713                if (elf_ppnt->p_type == PT_GNU_STACK) {
 714                        if (elf_ppnt->p_flags & PF_X)
 715                                executable_stack = EXSTACK_ENABLE_X;
 716                        else
 717                                executable_stack = EXSTACK_DISABLE_X;
 718                        break;
 719                }
 720
 721        /* Some simple consistency checks for the interpreter */
 722        if (elf_interpreter) {
 723                interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 724
 725                /* Now figure out which format our binary is */
 726                if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
 727                    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 728                    (N_MAGIC(loc->interp_ex) != QMAGIC))
 729                        interpreter_type = INTERPRETER_ELF;
 730
 731                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 732                        interpreter_type &= ~INTERPRETER_ELF;
 733
 734                retval = -ELIBBAD;
 735                if (!interpreter_type)
 736                        goto out_free_dentry;
 737
 738                /* Make sure only one type was selected */
 739                if ((interpreter_type & INTERPRETER_ELF) &&
 740                     interpreter_type != INTERPRETER_ELF) {
 741                        // FIXME - ratelimit this before re-enabling
 742                        // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
 743                        interpreter_type = INTERPRETER_ELF;
 744                }
 745                /* Verify the interpreter has a valid arch */
 746                if ((interpreter_type == INTERPRETER_ELF) &&
 747                    !elf_check_arch(&loc->interp_elf_ex))
 748                        goto out_free_dentry;
 749        } else {
 750                /* Executables without an interpreter also need a personality  */
 751                SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 752        }
 753
 754        /* OK, we are done with that, now set up the arg stuff,
 755           and then start this sucker up */
 756        if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 757                char *passed_p = passed_fileno;
 758                sprintf(passed_fileno, "%d", elf_exec_fileno);
 759
 760                if (elf_interpreter) {
 761                        retval = copy_strings_kernel(1, &passed_p, bprm);
 762                        if (retval)
 763                                goto out_free_dentry; 
 764                        bprm->argc++;
 765                }
 766        }
 767
 768        /* Flush all traces of the currently running executable */
 769        retval = flush_old_exec(bprm);
 770        if (retval)
 771                goto out_free_dentry;
 772
 773        /* Discard our unneeded old files struct */
 774        if (files) {
 775                put_files_struct(files);
 776                files = NULL;
 777        }
 778
 779        /* OK, This is the point of no return */
 780        current->mm->start_data = 0;
 781        current->mm->end_data = 0;
 782        current->mm->end_code = 0;
 783        current->mm->mmap = NULL;
 784        current->flags &= ~PF_FORKNOEXEC;
 785        current->mm->def_flags = def_flags;
 786
 787        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 788           may depend on the personality.  */
 789        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 790        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 791                current->personality |= READ_IMPLIES_EXEC;
 792
 793        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 794                current->flags |= PF_RANDOMIZE;
 795        arch_pick_mmap_layout(current->mm);
 796
 797        /* Do this so that we can load the interpreter, if need be.  We will
 798           change some of these later */
 799        current->mm->free_area_cache = current->mm->mmap_base;
 800        current->mm->cached_hole_size = 0;
 801        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 802                                 executable_stack);
 803        if (retval < 0) {
 804                send_sig(SIGKILL, current, 0);
 805                goto out_free_dentry;
 806        }
 807        
 808        current->mm->start_stack = bprm->p;
 809
 810        /* Now we do a little grungy work by mmaping the ELF image into
 811           the correct location in memory.  At this point, we assume that
 812           the image should be loaded at fixed address, not at a variable
 813           address. */
 814        for(i = 0, elf_ppnt = elf_phdata;
 815            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 816                int elf_prot = 0, elf_flags;
 817                unsigned long k, vaddr;
 818
 819                if (elf_ppnt->p_type != PT_LOAD)
 820                        continue;
 821
 822                if (unlikely (elf_brk > elf_bss)) {
 823                        unsigned long nbyte;
 824                    
 825                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 826                           before this one. Map anonymous pages, if needed,
 827                           and clear the area.  */
 828                        retval = set_brk (elf_bss + load_bias,
 829                                          elf_brk + load_bias);
 830                        if (retval) {
 831                                send_sig(SIGKILL, current, 0);
 832                                goto out_free_dentry;
 833                        }
 834                        nbyte = ELF_PAGEOFFSET(elf_bss);
 835                        if (nbyte) {
 836                                nbyte = ELF_MIN_ALIGN - nbyte;
 837                                if (nbyte > elf_brk - elf_bss)
 838                                        nbyte = elf_brk - elf_bss;
 839                                if (clear_user((void __user *)elf_bss +
 840                                                        load_bias, nbyte)) {
 841                                        /*
 842                                         * This bss-zeroing can fail if the ELF
 843                                         * file specifies odd protections. So
 844                                         * we don't check the return value
 845                                         */
 846                                }
 847                        }
 848                }
 849
 850                if (elf_ppnt->p_flags & PF_R)
 851                        elf_prot |= PROT_READ;
 852                if (elf_ppnt->p_flags & PF_W)
 853                        elf_prot |= PROT_WRITE;
 854                if (elf_ppnt->p_flags & PF_X)
 855                        elf_prot |= PROT_EXEC;
 856
 857                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 858
 859                vaddr = elf_ppnt->p_vaddr;
 860                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 861                        elf_flags |= MAP_FIXED;
 862                } else if (loc->elf_ex.e_type == ET_DYN) {
 863                        /* Try and get dynamic programs out of the way of the
 864                         * default mmap base, as well as whatever program they
 865                         * might try to exec.  This is because the brk will
 866                         * follow the loader, and is not movable.  */
 867                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 868                }
 869
 870                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 871                                elf_prot, elf_flags);
 872                if (BAD_ADDR(error)) {
 873                        send_sig(SIGKILL, current, 0);
 874                        retval = IS_ERR((void *)error) ?
 875                                PTR_ERR((void*)error) : -EINVAL;
 876                        goto out_free_dentry;
 877                }
 878
 879                if (!load_addr_set) {
 880                        load_addr_set = 1;
 881                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 882                        if (loc->elf_ex.e_type == ET_DYN) {
 883                                load_bias += error -
 884                                             ELF_PAGESTART(load_bias + vaddr);
 885                                load_addr += load_bias;
 886                                reloc_func_desc = load_bias;
 887                        }
 888                }
 889                k = elf_ppnt->p_vaddr;
 890                if (k < start_code)
 891                        start_code = k;
 892                if (start_data < k)
 893                        start_data = k;
 894
 895                /*
 896                 * Check to see if the section's size will overflow the
 897                 * allowed task size. Note that p_filesz must always be
 898                 * <= p_memsz so it is only necessary to check p_memsz.
 899                 */
 900                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 901                    elf_ppnt->p_memsz > TASK_SIZE ||
 902                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 903                        /* set_brk can never work. Avoid overflows. */
 904                        send_sig(SIGKILL, current, 0);
 905                        retval = -EINVAL;
 906                        goto out_free_dentry;
 907                }
 908
 909                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 910
 911                if (k > elf_bss)
 912                        elf_bss = k;
 913                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 914                        end_code = k;
 915                if (end_data < k)
 916                        end_data = k;
 917                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 918                if (k > elf_brk)
 919                        elf_brk = k;
 920        }
 921
 922        loc->elf_ex.e_entry += load_bias;
 923        elf_bss += load_bias;
 924        elf_brk += load_bias;
 925        start_code += load_bias;
 926        end_code += load_bias;
 927        start_data += load_bias;
 928        end_data += load_bias;
 929
 930        /* Calling set_brk effectively mmaps the pages that we need
 931         * for the bss and break sections.  We must do this before
 932         * mapping in the interpreter, to make sure it doesn't wind
 933         * up getting placed where the bss needs to go.
 934         */
 935        retval = set_brk(elf_bss, elf_brk);
 936        if (retval) {
 937                send_sig(SIGKILL, current, 0);
 938                goto out_free_dentry;
 939        }
 940        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 941                send_sig(SIGSEGV, current, 0);
 942                retval = -EFAULT; /* Nobody gets to see this, but.. */
 943                goto out_free_dentry;
 944        }
 945
 946        if (elf_interpreter) {
 947                if (interpreter_type == INTERPRETER_AOUT)
 948                        elf_entry = load_aout_interp(&loc->interp_ex,
 949                                                     interpreter);
 950                else
 951                        elf_entry = load_elf_interp(&loc->interp_elf_ex,
 952                                                    interpreter,
 953                                                    &interp_load_addr);
 954                if (BAD_ADDR(elf_entry)) {
 955                        force_sig(SIGSEGV, current);
 956                        retval = IS_ERR((void *)elf_entry) ?
 957                                        (int)elf_entry : -EINVAL;
 958                        goto out_free_dentry;
 959                }
 960                reloc_func_desc = interp_load_addr;
 961
 962                allow_write_access(interpreter);
 963                fput(interpreter);
 964                kfree(elf_interpreter);
 965        } else {
 966                elf_entry = loc->elf_ex.e_entry;
 967                if (BAD_ADDR(elf_entry)) {
 968                        force_sig(SIGSEGV, current);
 969                        retval = -EINVAL;
 970                        goto out_free_dentry;
 971                }
 972        }
 973
 974        kfree(elf_phdata);
 975
 976        if (interpreter_type != INTERPRETER_AOUT)
 977                sys_close(elf_exec_fileno);
 978
 979        set_binfmt(&elf_format);
 980
 981#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 982        retval = arch_setup_additional_pages(bprm, executable_stack);
 983        if (retval < 0) {
 984                send_sig(SIGKILL, current, 0);
 985                goto out;
 986        }
 987#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 988
 989        compute_creds(bprm);
 990        current->flags &= ~PF_FORKNOEXEC;
 991        create_elf_tables(bprm, &loc->elf_ex,
 992                          (interpreter_type == INTERPRETER_AOUT),
 993                          load_addr, interp_load_addr);
 994        /* N.B. passed_fileno might not be initialized? */
 995        if (interpreter_type == INTERPRETER_AOUT)
 996                current->mm->arg_start += strlen(passed_fileno) + 1;
 997        current->mm->end_code = end_code;
 998        current->mm->start_code = start_code;
 999        current->mm->start_data = start_data;
1000        current->mm->end_data = end_data;
1001        current->mm->start_stack = bprm->p;
1002
1003        if (current->personality & MMAP_PAGE_ZERO) {
1004                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1005                   and some applications "depend" upon this behavior.
1006                   Since we do not have the power to recompile these, we
1007                   emulate the SVr4 behavior. Sigh. */
1008                down_write(&current->mm->mmap_sem);
1009                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1010                                MAP_FIXED | MAP_PRIVATE, 0);
1011                up_write(&current->mm->mmap_sem);
1012        }
1013
1014#ifdef ELF_PLAT_INIT
1015        /*
1016         * The ABI may specify that certain registers be set up in special
1017         * ways (on i386 %edx is the address of a DT_FINI function, for
1018         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1019         * that the e_entry field is the address of the function descriptor
1020         * for the startup routine, rather than the address of the startup
1021         * routine itself.  This macro performs whatever initialization to
1022         * the regs structure is required as well as any relocations to the
1023         * function descriptor entries when executing dynamically links apps.
1024         */
1025        ELF_PLAT_INIT(regs, reloc_func_desc);
1026#endif
1027
1028        start_thread(regs, elf_entry, bprm->p);
1029        if (unlikely(current->ptrace & PT_PTRACED)) {
1030                if (current->ptrace & PT_TRACE_EXEC)
1031                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1032                else
1033                        send_sig(SIGTRAP, current, 0);
1034        }
1035        retval = 0;
1036out:
1037        kfree(loc);
1038out_ret:
1039        return retval;
1040
1041        /* error cleanup */
1042out_free_dentry:
1043        allow_write_access(interpreter);
1044        if (interpreter)
1045                fput(interpreter);
1046out_free_interp:
1047        kfree(elf_interpreter);
1048out_free_file:
1049        sys_close(elf_exec_fileno);
1050out_free_fh:
1051        if (files)
1052                reset_files_struct(current, files);
1053out_free_ph:
1054        kfree(elf_phdata);
1055        goto out;
1056}
1057
1058/* This is really simpleminded and specialized - we are loading an
1059   a.out library that is given an ELF header. */
1060static int load_elf_library(struct file *file)
1061{
1062        struct elf_phdr *elf_phdata;
1063        struct elf_phdr *eppnt;
1064        unsigned long elf_bss, bss, len;
1065        int retval, error, i, j;
1066        struct elfhdr elf_ex;
1067
1068        error = -ENOEXEC;
1069        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1070        if (retval != sizeof(elf_ex))
1071                goto out;
1072
1073        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1074                goto out;
1075
1076        /* First of all, some simple consistency checks */
1077        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1078            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1079                goto out;
1080
1081        /* Now read in all of the header information */
1082
1083        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1084        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1085
1086        error = -ENOMEM;
1087        elf_phdata = kmalloc(j, GFP_KERNEL);
1088        if (!elf_phdata)
1089                goto out;
1090
1091        eppnt = elf_phdata;
1092        error = -ENOEXEC;
1093        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1094        if (retval != j)
1095                goto out_free_ph;
1096
1097        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1098                if ((eppnt + i)->p_type == PT_LOAD)
1099                        j++;
1100        if (j != 1)
1101                goto out_free_ph;
1102
1103        while (eppnt->p_type != PT_LOAD)
1104                eppnt++;
1105
1106        /* Now use mmap to map the library into memory. */
1107        down_write(&current->mm->mmap_sem);
1108        error = do_mmap(file,
1109                        ELF_PAGESTART(eppnt->p_vaddr),
1110                        (eppnt->p_filesz +
1111                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1112                        PROT_READ | PROT_WRITE | PROT_EXEC,
1113                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1114                        (eppnt->p_offset -
1115                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1116        up_write(&current->mm->mmap_sem);
1117        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1118                goto out_free_ph;
1119
1120        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1121        if (padzero(elf_bss)) {
1122                error = -EFAULT;
1123                goto out_free_ph;
1124        }
1125
1126        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1127                            ELF_MIN_ALIGN - 1);
1128        bss = eppnt->p_memsz + eppnt->p_vaddr;
1129        if (bss > len) {
1130                down_write(&current->mm->mmap_sem);
1131                do_brk(len, bss - len);
1132                up_write(&current->mm->mmap_sem);
1133        }
1134        error = 0;
1135
1136out_free_ph:
1137        kfree(elf_phdata);
1138out:
1139        return error;
1140}
1141
1142/*
1143 * Note that some platforms still use traditional core dumps and not
1144 * the ELF core dump.  Each platform can select it as appropriate.
1145 */
1146#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1147
1148/*
1149 * ELF core dumper
1150 *
1151 * Modelled on fs/exec.c:aout_core_dump()
1152 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1153 */
1154/*
1155 * These are the only things you should do on a core-file: use only these
1156 * functions to write out all the necessary info.
1157 */
1158static int dump_write(struct file *file, const void *addr, int nr)
1159{
1160        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1161}
1162
1163static int dump_seek(struct file *file, loff_t off)
1164{
1165        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1166                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1167                        return 0;
1168        } else {
1169                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1170                if (!buf)
1171                        return 0;
1172                while (off > 0) {
1173                        unsigned long n = off;
1174                        if (n > PAGE_SIZE)
1175                                n = PAGE_SIZE;
1176                        if (!dump_write(file, buf, n))
1177                                return 0;
1178                        off -= n;
1179                }
1180                free_page((unsigned long)buf);
1181        }
1182        return 1;
1183}
1184
1185/*
1186 * Decide whether a segment is worth dumping; default is yes to be
1187 * sure (missing info is worse than too much; etc).
1188 * Personally I'd include everything, and use the coredump limit...
1189 *
1190 * I think we should skip something. But I am not sure how. H.J.
1191 */
1192static int maydump(struct vm_area_struct *vma)
1193{
1194        /* The vma can be set up to tell us the answer directly.  */
1195        if (vma->vm_flags & VM_ALWAYSDUMP)
1196                return 1;
1197
1198        /* Do not dump I/O mapped devices or special mappings */
1199        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1200                return 0;
1201
1202        /* Dump shared memory only if mapped from an anonymous file. */
1203        if (vma->vm_flags & VM_SHARED)
1204                return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1205
1206        /* If it hasn't been written to, don't write it out */
1207        if (!vma->anon_vma)
1208                return 0;
1209
1210        return 1;
1211}
1212
1213/* An ELF note in memory */
1214struct memelfnote
1215{
1216        const char *name;
1217        int type;
1218        unsigned int datasz;
1219        void *data;
1220};
1221
1222static int notesize(struct memelfnote *en)
1223{
1224        int sz;
1225
1226        sz = sizeof(struct elf_note);
1227        sz += roundup(strlen(en->name) + 1, 4);
1228        sz += roundup(en->datasz, 4);
1229
1230        return sz;
1231}
1232
1233#define DUMP_WRITE(addr, nr, foffset)   \
1234        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1235
1236static int alignfile(struct file *file, loff_t *foffset)
1237{
1238        static const char buf[4] = { 0, };
1239        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1240        return 1;
1241}
1242
1243static int writenote(struct memelfnote *men, struct file *file,
1244                        loff_t *foffset)
1245{
1246        struct elf_note en;
1247        en.n_namesz = strlen(men->name) + 1;
1248        en.n_descsz = men->datasz;
1249        en.n_type = men->type;
1250
1251        DUMP_WRITE(&en, sizeof(en), foffset);
1252        DUMP_WRITE(men->name, en.n_namesz, foffset);
1253        if (!alignfile(file, foffset))
1254                return 0;
1255        DUMP_WRITE(men->data, men->datasz, foffset);
1256        if (!alignfile(file, foffset))
1257                return 0;
1258
1259        return 1;
1260}
1261#undef DUMP_WRITE
1262
1263#define DUMP_WRITE(addr, nr)    \
1264        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1265                goto end_coredump;
1266#define DUMP_SEEK(off)  \
1267        if (!dump_seek(file, (off))) \
1268                goto end_coredump;
1269
1270static void fill_elf_header(struct elfhdr *elf, int segs)
1271{
1272        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1273        elf->e_ident[EI_CLASS] = ELF_CLASS;
1274        elf->e_ident[EI_DATA] = ELF_DATA;
1275        elf->e_ident[EI_VERSION] = EV_CURRENT;
1276        elf->e_ident[EI_OSABI] = ELF_OSABI;
1277        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1278
1279        elf->e_type = ET_CORE;
1280        elf->e_machine = ELF_ARCH;
1281        elf->e_version = EV_CURRENT;
1282        elf->e_entry = 0;
1283        elf->e_phoff = sizeof(struct elfhdr);
1284        elf->e_shoff = 0;
1285        elf->e_flags = ELF_CORE_EFLAGS;
1286        elf->e_ehsize = sizeof(struct elfhdr);
1287        elf->e_phentsize = sizeof(struct elf_phdr);
1288        elf->e_phnum = segs;
1289        elf->e_shentsize = 0;
1290        elf->e_shnum = 0;
1291        elf->e_shstrndx = 0;
1292        return;
1293}
1294
1295static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1296{
1297        phdr->p_type = PT_NOTE;
1298        phdr->p_offset = offset;
1299        phdr->p_vaddr = 0;
1300        phdr->p_paddr = 0;
1301        phdr->p_filesz = sz;
1302        phdr->p_memsz = 0;
1303        phdr->p_flags = 0;
1304        phdr->p_align = 0;
1305        return;
1306}
1307
1308static void fill_note(struct memelfnote *note, const char *name, int type, 
1309                unsigned int sz, void *data)
1310{
1311        note->name = name;
1312        note->type = type;
1313        note->datasz = sz;
1314        note->data = data;
1315        return;
1316}
1317
1318/*
1319 * fill up all the fields in prstatus from the given task struct, except
1320 * registers which need to be filled up separately.
1321 */
1322static void fill_prstatus(struct elf_prstatus *prstatus,
1323                struct task_struct *p, long signr)
1324{
1325        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1326        prstatus->pr_sigpend = p->pending.signal.sig[0];
1327        prstatus->pr_sighold = p->blocked.sig[0];
1328        prstatus->pr_pid = p->pid;
1329        prstatus->pr_ppid = p->parent->pid;
1330        prstatus->pr_pgrp = process_group(p);
1331        prstatus->pr_sid = process_session(p);
1332        if (thread_group_leader(p)) {
1333                /*
1334                 * This is the record for the group leader.  Add in the
1335                 * cumulative times of previous dead threads.  This total
1336                 * won't include the time of each live thread whose state
1337                 * is included in the core dump.  The final total reported
1338                 * to our parent process when it calls wait4 will include
1339                 * those sums as well as the little bit more time it takes
1340                 * this and each other thread to finish dying after the
1341                 * core dump synchronization phase.
1342                 */
1343                cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1344                                   &prstatus->pr_utime);
1345                cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1346                                   &prstatus->pr_stime);
1347        } else {
1348                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1349                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1350        }
1351        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1352        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1353}
1354
1355static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1356                       struct mm_struct *mm)
1357{
1358        unsigned int i, len;
1359        
1360        /* first copy the parameters from user space */
1361        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1362
1363        len = mm->arg_end - mm->arg_start;
1364        if (len >= ELF_PRARGSZ)
1365                len = ELF_PRARGSZ-1;
1366        if (copy_from_user(&psinfo->pr_psargs,
1367                           (const char __user *)mm->arg_start, len))
1368                return -EFAULT;
1369        for(i = 0; i < len; i++)
1370                if (psinfo->pr_psargs[i] == 0)
1371                        psinfo->pr_psargs[i] = ' ';
1372        psinfo->pr_psargs[len] = 0;
1373
1374        psinfo->pr_pid = p->pid;
1375        psinfo->pr_ppid = p->parent->pid;
1376        psinfo->pr_pgrp = process_group(p);
1377        psinfo->pr_sid = process_session(p);
1378
1379        i = p->state ? ffz(~p->state) + 1 : 0;
1380        psinfo->pr_state = i;
1381        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1382        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1383        psinfo->pr_nice = task_nice(p);
1384        psinfo->pr_flag = p->flags;
1385        SET_UID(psinfo->pr_uid, p->uid);
1386        SET_GID(psinfo->pr_gid, p->gid);
1387        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1388        
1389        return 0;
1390}
1391
1392/* Here is the structure in which status of each thread is captured. */
1393struct elf_thread_status
1394{
1395        struct list_head list;
1396        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1397        elf_fpregset_t fpu;             /* NT_PRFPREG */
1398        struct task_struct *thread;
1399#ifdef ELF_CORE_COPY_XFPREGS
1400        elf_fpxregset_t xfpu;           /* NT_PRXFPREG */
1401#endif
1402        struct memelfnote notes[3];
1403        int num_notes;
1404};
1405
1406/*
1407 * In order to add the specific thread information for the elf file format,
1408 * we need to keep a linked list of every threads pr_status and then create
1409 * a single section for them in the final core file.
1410 */
1411static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1412{
1413        int sz = 0;
1414        struct task_struct *p = t->thread;
1415        t->num_notes = 0;
1416
1417        fill_prstatus(&t->prstatus, p, signr);
1418        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1419        
1420        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1421                  &(t->prstatus));
1422        t->num_notes++;
1423        sz += notesize(&t->notes[0]);
1424
1425        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1426                                                                &t->fpu))) {
1427                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1428                          &(t->fpu));
1429                t->num_notes++;
1430                sz += notesize(&t->notes[1]);
1431        }
1432
1433#ifdef ELF_CORE_COPY_XFPREGS
1434        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1435                fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1436                          &t->xfpu);
1437                t->num_notes++;
1438                sz += notesize(&t->notes[2]);
1439        }
1440#endif  
1441        return sz;
1442}
1443
1444static struct vm_area_struct *first_vma(struct task_struct *tsk,
1445                                        struct vm_area_struct *gate_vma)
1446{
1447        struct vm_area_struct *ret = tsk->mm->mmap;
1448
1449        if (ret)
1450                return ret;
1451        return gate_vma;
1452}
1453/*
1454 * Helper function for iterating across a vma list.  It ensures that the caller
1455 * will visit `gate_vma' prior to terminating the search.
1456 */
1457static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1458                                        struct vm_area_struct *gate_vma)
1459{
1460        struct vm_area_struct *ret;
1461
1462        ret = this_vma->vm_next;
1463        if (ret)
1464                return ret;
1465        if (this_vma == gate_vma)
1466                return NULL;
1467        return gate_vma;
1468}
1469
1470/*
1471 * Actual dumper
1472 *
1473 * This is a two-pass process; first we find the offsets of the bits,
1474 * and then they are actually written out.  If we run out of core limit
1475 * we just truncate.
1476 */
1477static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1478{
1479#define NUM_NOTES       6
1480        int has_dumped = 0;
1481        mm_segment_t fs;
1482        int segs;
1483        size_t size = 0;
1484        int i;
1485        struct vm_area_struct *vma, *gate_vma;
1486        struct elfhdr *elf = NULL;
1487        loff_t offset = 0, dataoff, foffset;
1488        unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1489        int numnote;
1490        struct memelfnote *notes = NULL;
1491        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
1492        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
1493        struct task_struct *g, *p;
1494        LIST_HEAD(thread_list);
1495        struct list_head *t;
1496        elf_fpregset_t *fpu = NULL;
1497#ifdef ELF_CORE_COPY_XFPREGS
1498        elf_fpxregset_t *xfpu = NULL;
1499#endif
1500        int thread_status_size = 0;
1501        elf_addr_t *auxv;
1502#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1503        int extra_notes_size;
1504#endif
1505
1506        /*
1507         * We no longer stop all VM operations.
1508         * 
1509         * This is because those proceses that could possibly change map_count
1510         * or the mmap / vma pages are now blocked in do_exit on current
1511         * finishing this core dump.
1512         *
1513         * Only ptrace can touch these memory addresses, but it doesn't change
1514         * the map_count or the pages allocated. So no possibility of crashing
1515         * exists while dumping the mm->vm_next areas to the core file.
1516         */
1517  
1518        /* alloc memory for large data structures: too large to be on stack */
1519        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1520        if (!elf)
1521                goto cleanup;
1522        prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1523        if (!prstatus)
1524                goto cleanup;
1525        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1526        if (!psinfo)
1527                goto cleanup;
1528        notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1529        if (!notes)
1530                goto cleanup;
1531        fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1532        if (!fpu)
1533                goto cleanup;
1534#ifdef ELF_CORE_COPY_XFPREGS
1535        xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1536        if (!xfpu)
1537                goto cleanup;
1538#endif
1539
1540        if (signr) {
1541                struct elf_thread_status *tmp;
1542                rcu_read_lock();
1543                do_each_thread(g,p)
1544                        if (current->mm == p->mm && current != p) {
1545                                tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1546                                if (!tmp) {
1547                                        rcu_read_unlock();
1548                                        goto cleanup;
1549                                }
1550                                tmp->thread = p;
1551                                list_add(&tmp->list, &thread_list);
1552                        }
1553                while_each_thread(g,p);
1554                rcu_read_unlock();
1555                list_for_each(t, &thread_list) {
1556                        struct elf_thread_status *tmp;
1557                        int sz;
1558
1559                        tmp = list_entry(t, struct elf_thread_status, list);
1560                        sz = elf_dump_thread_status(signr, tmp);
1561                        thread_status_size += sz;
1562                }
1563        }
1564        /* now collect the dump for the current */
1565        memset(prstatus, 0, sizeof(*prstatus));
1566        fill_prstatus(prstatus, current, signr);
1567        elf_core_copy_regs(&prstatus->pr_reg, regs);
1568        
1569        segs = current->mm->map_count;
1570#ifdef ELF_CORE_EXTRA_PHDRS
1571        segs += ELF_CORE_EXTRA_PHDRS;
1572#endif
1573
1574        gate_vma = get_gate_vma(current);
1575        if (gate_vma != NULL)
1576                segs++;
1577
1578        /* Set up header */
1579        fill_elf_header(elf, segs + 1); /* including notes section */
1580
1581        has_dumped = 1;
1582        current->flags |= PF_DUMPCORE;
1583
1584        /*
1585         * Set up the notes in similar form to SVR4 core dumps made
1586         * with info from their /proc.
1587         */
1588
1589        fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1590        fill_psinfo(psinfo, current->group_leader, current->mm);
1591        fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1592        
1593        numnote = 2;
1594
1595        auxv = (elf_addr_t *)current->mm->saved_auxv;
1596
1597        i = 0;
1598        do
1599                i += 2;
1600        while (auxv[i - 2] != AT_NULL);
1601        fill_note(&notes[numnote++], "CORE", NT_AUXV,
1602                  i * sizeof(elf_addr_t), auxv);
1603
1604        /* Try to dump the FPU. */
1605        if ((prstatus->pr_fpvalid =
1606             elf_core_copy_task_fpregs(current, regs, fpu)))
1607                fill_note(notes + numnote++,
1608                          "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1609#ifdef ELF_CORE_COPY_XFPREGS
1610        if (elf_core_copy_task_xfpregs(current, xfpu))
1611                fill_note(notes + numnote++,
1612                          "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1613#endif  
1614  
1615        fs = get_fs();
1616        set_fs(KERNEL_DS);
1617
1618        DUMP_WRITE(elf, sizeof(*elf));
1619        offset += sizeof(*elf);                         /* Elf header */
1620        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1621        foffset = offset;
1622
1623        /* Write notes phdr entry */
1624        {
1625                struct elf_phdr phdr;
1626                int sz = 0;
1627
1628                for (i = 0; i < numnote; i++)
1629                        sz += notesize(notes + i);
1630                
1631                sz += thread_status_size;
1632
1633#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1634                extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1635                sz += extra_notes_size;
1636#endif
1637
1638                fill_elf_note_phdr(&phdr, sz, offset);
1639                offset += sz;
1640                DUMP_WRITE(&phdr, sizeof(phdr));
1641        }
1642
1643        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1644
1645        /* Write program headers for segments dump */
1646        for (vma = first_vma(current, gate_vma); vma != NULL;
1647                        vma = next_vma(vma, gate_vma)) {
1648                struct elf_phdr phdr;
1649                size_t sz;
1650
1651                sz = vma->vm_end - vma->vm_start;
1652
1653                phdr.p_type = PT_LOAD;
1654                phdr.p_offset = offset;
1655                phdr.p_vaddr = vma->vm_start;
1656                phdr.p_paddr = 0;
1657                phdr.p_filesz = maydump(vma) ? sz : 0;
1658                phdr.p_memsz = sz;
1659                offset += phdr.p_filesz;
1660                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1661                if (vma->vm_flags & VM_WRITE)
1662                        phdr.p_flags |= PF_W;
1663                if (vma->vm_flags & VM_EXEC)
1664                        phdr.p_flags |= PF_X;
1665                phdr.p_align = ELF_EXEC_PAGESIZE;
1666
1667                DUMP_WRITE(&phdr, sizeof(phdr));
1668        }
1669
1670#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1671        ELF_CORE_WRITE_EXTRA_PHDRS;
1672#endif
1673
1674        /* write out the notes section */
1675        for (i = 0; i < numnote; i++)
1676                if (!writenote(notes + i, file, &foffset))
1677                        goto end_coredump;
1678
1679#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1680        ELF_CORE_WRITE_EXTRA_NOTES;
1681        foffset += extra_notes_size;
1682#endif
1683
1684        /* write out the thread status notes section */
1685        list_for_each(t, &thread_list) {
1686                struct elf_thread_status *tmp =
1687                                list_entry(t, struct elf_thread_status, list);
1688
1689                for (i = 0; i < tmp->num_notes; i++)
1690                        if (!writenote(&tmp->notes[i], file, &foffset))
1691                                goto end_coredump;
1692        }
1693
1694        /* Align to page */
1695        DUMP_SEEK(dataoff - foffset);
1696
1697        for (vma = first_vma(current, gate_vma); vma != NULL;
1698                        vma = next_vma(vma, gate_vma)) {
1699                unsigned long addr;
1700
1701                if (!maydump(vma))
1702                        continue;
1703
1704                for (addr = vma->vm_start;
1705                     addr < vma->vm_end;
1706                     addr += PAGE_SIZE) {
1707                        struct page *page;
1708                        struct vm_area_struct *vma;
1709
1710                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1711                                                &page, &vma) <= 0) {
1712                                DUMP_SEEK(PAGE_SIZE);
1713                        } else {
1714                                if (page == ZERO_PAGE(addr)) {
1715                                        if (!dump_seek(file, PAGE_SIZE)) {
1716                                                page_cache_release(page);
1717                                                goto end_coredump;
1718                                        }
1719                                } else {
1720                                        void *kaddr;
1721                                        flush_cache_page(vma, addr,
1722                                                         page_to_pfn(page));
1723                                        kaddr = kmap(page);
1724                                        if ((size += PAGE_SIZE) > limit ||
1725                                            !dump_write(file, kaddr,
1726                                            PAGE_SIZE)) {
1727                                                kunmap(page);
1728                                                page_cache_release(page);
1729                                                goto end_coredump;
1730                                        }
1731                                        kunmap(page);
1732                                }
1733                                page_cache_release(page);
1734                        }
1735                }
1736        }
1737
1738#ifdef ELF_CORE_WRITE_EXTRA_DATA
1739        ELF_CORE_WRITE_EXTRA_DATA;
1740#endif
1741
1742end_coredump:
1743        set_fs(fs);
1744
1745cleanup:
1746        while (!list_empty(&thread_list)) {
1747                struct list_head *tmp = thread_list.next;
1748                list_del(tmp);
1749                kfree(list_entry(tmp, struct elf_thread_status, list));
1750        }
1751
1752        kfree(elf);
1753        kfree(prstatus);
1754        kfree(psinfo);
1755        kfree(notes);
1756        kfree(fpu);
1757#ifdef ELF_CORE_COPY_XFPREGS
1758        kfree(xfpu);
1759#endif
1760        return has_dumped;
1761#undef NUM_NOTES
1762}
1763
1764#endif          /* USE_ELF_CORE_DUMP */
1765
1766static int __init init_elf_binfmt(void)
1767{
1768        return register_binfmt(&elf_format);
1769}
1770
1771static void __exit exit_elf_binfmt(void)
1772{
1773        /* Remove the COFF and ELF loaders. */
1774        unregister_binfmt(&elf_format);
1775}
1776
1777core_initcall(init_elf_binfmt);
1778module_exit(exit_elf_binfmt);
1779MODULE_LICENSE("GPL");
1780
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.