linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/a.out.h>
  20#include <linux/errno.h>
  21#include <linux/signal.h>
  22#include <linux/binfmts.h>
  23#include <linux/string.h>
  24#include <linux/file.h>
  25#include <linux/fcntl.h>
  26#include <linux/ptrace.h>
  27#include <linux/slab.h>
  28#include <linux/shm.h>
  29#include <linux/personality.h>
  30#include <linux/elfcore.h>
  31#include <linux/init.h>
  32#include <linux/highuid.h>
  33#include <linux/smp.h>
  34#include <linux/smp_lock.h>
  35#include <linux/compiler.h>
  36#include <linux/highmem.h>
  37#include <linux/pagemap.h>
  38#include <linux/security.h>
  39#include <linux/syscalls.h>
  40#include <linux/random.h>
  41#include <linux/elf.h>
  42#include <asm/uaccess.h>
  43#include <asm/param.h>
  44#include <asm/page.h>
  45
  46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  47static int load_elf_library(struct file *);
  48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
  49
  50/*
  51 * If we don't support core dumping, then supply a NULL so we
  52 * don't even try.
  53 */
  54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
  56#else
  57#define elf_core_dump   NULL
  58#endif
  59
  60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  61#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  62#else
  63#define ELF_MIN_ALIGN   PAGE_SIZE
  64#endif
  65
  66#ifndef ELF_CORE_EFLAGS
  67#define ELF_CORE_EFLAGS 0
  68#endif
  69
  70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  73
  74static struct linux_binfmt elf_format = {
  75                .module         = THIS_MODULE,
  76                .load_binary    = load_elf_binary,
  77                .load_shlib     = load_elf_library,
  78                .core_dump      = elf_core_dump,
  79                .min_coredump   = ELF_EXEC_PAGESIZE
  80};
  81
  82#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  83
  84static int set_brk(unsigned long start, unsigned long end)
  85{
  86        start = ELF_PAGEALIGN(start);
  87        end = ELF_PAGEALIGN(end);
  88        if (end > start) {
  89                unsigned long addr;
  90                down_write(&current->mm->mmap_sem);
  91                addr = do_brk(start, end - start);
  92                up_write(&current->mm->mmap_sem);
  93                if (BAD_ADDR(addr))
  94                        return addr;
  95        }
  96        current->mm->start_brk = current->mm->brk = end;
  97        return 0;
  98}
  99
 100/* We need to explicitly zero any fractional pages
 101   after the data section (i.e. bss).  This would
 102   contain the junk from the file that should not
 103   be in memory
 104 */
 105static int padzero(unsigned long elf_bss)
 106{
 107        unsigned long nbyte;
 108
 109        nbyte = ELF_PAGEOFFSET(elf_bss);
 110        if (nbyte) {
 111                nbyte = ELF_MIN_ALIGN - nbyte;
 112                if (clear_user((void __user *) elf_bss, nbyte))
 113                        return -EFAULT;
 114        }
 115        return 0;
 116}
 117
 118/* Let's use some macros to make this stack manipulation a litle clearer */
 119#ifdef CONFIG_STACK_GROWSUP
 120#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 121#define STACK_ROUND(sp, items) \
 122        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 123#define STACK_ALLOC(sp, len) ({ \
 124        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 125        old_sp; })
 126#else
 127#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 128#define STACK_ROUND(sp, items) \
 129        (((unsigned long) (sp - items)) &~ 15UL)
 130#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 131#endif
 132
 133static int
 134create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 135                int interp_aout, unsigned long load_addr,
 136                unsigned long interp_load_addr)
 137{
 138        unsigned long p = bprm->p;
 139        int argc = bprm->argc;
 140        int envc = bprm->envc;
 141        elf_addr_t __user *argv;
 142        elf_addr_t __user *envp;
 143        elf_addr_t __user *sp;
 144        elf_addr_t __user *u_platform;
 145        const char *k_platform = ELF_PLATFORM;
 146        int items;
 147        elf_addr_t *elf_info;
 148        int ei_index = 0;
 149        struct task_struct *tsk = current;
 150
 151        /*
 152         * If this architecture has a platform capability string, copy it
 153         * to userspace.  In some cases (Sparc), this info is impossible
 154         * for userspace to get any other way, in others (i386) it is
 155         * merely difficult.
 156         */
 157        u_platform = NULL;
 158        if (k_platform) {
 159                size_t len = strlen(k_platform) + 1;
 160
 161                /*
 162                 * In some cases (e.g. Hyper-Threading), we want to avoid L1
 163                 * evictions by the processes running on the same package. One
 164                 * thing we can do is to shuffle the initial stack for them.
 165                 */
 166
 167                p = arch_align_stack(p);
 168
 169                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 170                if (__copy_to_user(u_platform, k_platform, len))
 171                        return -EFAULT;
 172        }
 173
 174        /* Create the ELF interpreter info */
 175        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 176#define NEW_AUX_ENT(id, val) \
 177        do { \
 178                elf_info[ei_index++] = id; \
 179                elf_info[ei_index++] = val; \
 180        } while (0)
 181
 182#ifdef ARCH_DLINFO
 183        /* 
 184         * ARCH_DLINFO must come first so PPC can do its special alignment of
 185         * AUXV.
 186         */
 187        ARCH_DLINFO;
 188#endif
 189        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 190        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 191        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 192        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 193        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 194        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 195        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 196        NEW_AUX_ENT(AT_FLAGS, 0);
 197        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 198        NEW_AUX_ENT(AT_UID, tsk->uid);
 199        NEW_AUX_ENT(AT_EUID, tsk->euid);
 200        NEW_AUX_ENT(AT_GID, tsk->gid);
 201        NEW_AUX_ENT(AT_EGID, tsk->egid);
 202        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 203        if (k_platform) {
 204                NEW_AUX_ENT(AT_PLATFORM,
 205                            (elf_addr_t)(unsigned long)u_platform);
 206        }
 207        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 208                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 209        }
 210#undef NEW_AUX_ENT
 211        /* AT_NULL is zero; clear the rest too */
 212        memset(&elf_info[ei_index], 0,
 213               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 214
 215        /* And advance past the AT_NULL entry.  */
 216        ei_index += 2;
 217
 218        sp = STACK_ADD(p, ei_index);
 219
 220        items = (argc + 1) + (envc + 1);
 221        if (interp_aout) {
 222                items += 3; /* a.out interpreters require argv & envp too */
 223        } else {
 224                items += 1; /* ELF interpreters only put argc on the stack */
 225        }
 226        bprm->p = STACK_ROUND(sp, items);
 227
 228        /* Point sp at the lowest address on the stack */
 229#ifdef CONFIG_STACK_GROWSUP
 230        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 231        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 232#else
 233        sp = (elf_addr_t __user *)bprm->p;
 234#endif
 235
 236        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 237        if (__put_user(argc, sp++))
 238                return -EFAULT;
 239        if (interp_aout) {
 240                argv = sp + 2;
 241                envp = argv + argc + 1;
 242                if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
 243                    __put_user((elf_addr_t)(unsigned long)envp, sp++))
 244                        return -EFAULT;
 245        } else {
 246                argv = sp;
 247                envp = argv + argc + 1;
 248        }
 249
 250        /* Populate argv and envp */
 251        p = current->mm->arg_end = current->mm->arg_start;
 252        while (argc-- > 0) {
 253                size_t len;
 254                if (__put_user((elf_addr_t)p, argv++))
 255                        return -EFAULT;
 256                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 257                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 258                        return 0;
 259                p += len;
 260        }
 261        if (__put_user(0, argv))
 262                return -EFAULT;
 263        current->mm->arg_end = current->mm->env_start = p;
 264        while (envc-- > 0) {
 265                size_t len;
 266                if (__put_user((elf_addr_t)p, envp++))
 267                        return -EFAULT;
 268                len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 269                if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 270                        return 0;
 271                p += len;
 272        }
 273        if (__put_user(0, envp))
 274                return -EFAULT;
 275        current->mm->env_end = p;
 276
 277        /* Put the elf_info on the stack in the right place.  */
 278        sp = (elf_addr_t __user *)envp + 1;
 279        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 280                return -EFAULT;
 281        return 0;
 282}
 283
 284#ifndef elf_map
 285
 286static unsigned long elf_map(struct file *filep, unsigned long addr,
 287                struct elf_phdr *eppnt, int prot, int type)
 288{
 289        unsigned long map_addr;
 290        unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
 291
 292        down_write(&current->mm->mmap_sem);
 293        /* mmap() will return -EINVAL if given a zero size, but a
 294         * segment with zero filesize is perfectly valid */
 295        if (eppnt->p_filesz + pageoffset)
 296                map_addr = do_mmap(filep, ELF_PAGESTART(addr),
 297                                   eppnt->p_filesz + pageoffset, prot, type,
 298                                   eppnt->p_offset - pageoffset);
 299        else
 300                map_addr = ELF_PAGESTART(addr);
 301        up_write(&current->mm->mmap_sem);
 302        return(map_addr);
 303}
 304
 305#endif /* !elf_map */
 306
 307/* This is much more generalized than the library routine read function,
 308   so we keep this separate.  Technically the library read function
 309   is only provided so that we can read a.out libraries that have
 310   an ELF header */
 311
 312static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 313                struct file *interpreter, unsigned long *interp_load_addr)
 314{
 315        struct elf_phdr *elf_phdata;
 316        struct elf_phdr *eppnt;
 317        unsigned long load_addr = 0;
 318        int load_addr_set = 0;
 319        unsigned long last_bss = 0, elf_bss = 0;
 320        unsigned long error = ~0UL;
 321        int retval, i, size;
 322
 323        /* First of all, some simple consistency checks */
 324        if (interp_elf_ex->e_type != ET_EXEC &&
 325            interp_elf_ex->e_type != ET_DYN)
 326                goto out;
 327        if (!elf_check_arch(interp_elf_ex))
 328                goto out;
 329        if (!interpreter->f_op || !interpreter->f_op->mmap)
 330                goto out;
 331
 332        /*
 333         * If the size of this structure has changed, then punt, since
 334         * we will be doing the wrong thing.
 335         */
 336        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 337                goto out;
 338        if (interp_elf_ex->e_phnum < 1 ||
 339                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 340                goto out;
 341
 342        /* Now read in all of the header information */
 343        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 344        if (size > ELF_MIN_ALIGN)
 345                goto out;
 346        elf_phdata = kmalloc(size, GFP_KERNEL);
 347        if (!elf_phdata)
 348                goto out;
 349
 350        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 351                             (char *)elf_phdata,size);
 352        error = -EIO;
 353        if (retval != size) {
 354                if (retval < 0)
 355                        error = retval; 
 356                goto out_close;
 357        }
 358
 359        eppnt = elf_phdata;
 360        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 361                if (eppnt->p_type == PT_LOAD) {
 362                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 363                        int elf_prot = 0;
 364                        unsigned long vaddr = 0;
 365                        unsigned long k, map_addr;
 366
 367                        if (eppnt->p_flags & PF_R)
 368                                elf_prot = PROT_READ;
 369                        if (eppnt->p_flags & PF_W)
 370                                elf_prot |= PROT_WRITE;
 371                        if (eppnt->p_flags & PF_X)
 372                                elf_prot |= PROT_EXEC;
 373                        vaddr = eppnt->p_vaddr;
 374                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 375                                elf_type |= MAP_FIXED;
 376
 377                        map_addr = elf_map(interpreter, load_addr + vaddr,
 378                                           eppnt, elf_prot, elf_type);
 379                        error = map_addr;
 380                        if (BAD_ADDR(map_addr))
 381                                goto out_close;
 382
 383                        if (!load_addr_set &&
 384                            interp_elf_ex->e_type == ET_DYN) {
 385                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 386                                load_addr_set = 1;
 387                        }
 388
 389                        /*
 390                         * Check to see if the section's size will overflow the
 391                         * allowed task size. Note that p_filesz must always be
 392                         * <= p_memsize so it's only necessary to check p_memsz.
 393                         */
 394                        k = load_addr + eppnt->p_vaddr;
 395                        if (BAD_ADDR(k) ||
 396                            eppnt->p_filesz > eppnt->p_memsz ||
 397                            eppnt->p_memsz > TASK_SIZE ||
 398                            TASK_SIZE - eppnt->p_memsz < k) {
 399                                error = -ENOMEM;
 400                                goto out_close;
 401                        }
 402
 403                        /*
 404                         * Find the end of the file mapping for this phdr, and
 405                         * keep track of the largest address we see for this.
 406                         */
 407                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 408                        if (k > elf_bss)
 409                                elf_bss = k;
 410
 411                        /*
 412                         * Do the same thing for the memory mapping - between
 413                         * elf_bss and last_bss is the bss section.
 414                         */
 415                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 416                        if (k > last_bss)
 417                                last_bss = k;
 418                }
 419        }
 420
 421        /*
 422         * Now fill out the bss section.  First pad the last page up
 423         * to the page boundary, and then perform a mmap to make sure
 424         * that there are zero-mapped pages up to and including the 
 425         * last bss page.
 426         */
 427        if (padzero(elf_bss)) {
 428                error = -EFAULT;
 429                goto out_close;
 430        }
 431
 432        /* What we have mapped so far */
 433        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 434
 435        /* Map the last of the bss segment */
 436        if (last_bss > elf_bss) {
 437                down_write(&current->mm->mmap_sem);
 438                error = do_brk(elf_bss, last_bss - elf_bss);
 439                up_write(&current->mm->mmap_sem);
 440                if (BAD_ADDR(error))
 441                        goto out_close;
 442        }
 443
 444        *interp_load_addr = load_addr;
 445        error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
 446
 447out_close:
 448        kfree(elf_phdata);
 449out:
 450        return error;
 451}
 452
 453static unsigned long load_aout_interp(struct exec *interp_ex,
 454                struct file *interpreter)
 455{
 456        unsigned long text_data, elf_entry = ~0UL;
 457        char __user * addr;
 458        loff_t offset;
 459
 460        current->mm->end_code = interp_ex->a_text;
 461        text_data = interp_ex->a_text + interp_ex->a_data;
 462        current->mm->end_data = text_data;
 463        current->mm->brk = interp_ex->a_bss + text_data;
 464
 465        switch (N_MAGIC(*interp_ex)) {
 466        case OMAGIC:
 467                offset = 32;
 468                addr = (char __user *)0;
 469                break;
 470        case ZMAGIC:
 471        case QMAGIC:
 472                offset = N_TXTOFF(*interp_ex);
 473                addr = (char __user *)N_TXTADDR(*interp_ex);
 474                break;
 475        default:
 476                goto out;
 477        }
 478
 479        down_write(&current->mm->mmap_sem);     
 480        do_brk(0, text_data);
 481        up_write(&current->mm->mmap_sem);
 482        if (!interpreter->f_op || !interpreter->f_op->read)
 483                goto out;
 484        if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
 485                goto out;
 486        flush_icache_range((unsigned long)addr,
 487                           (unsigned long)addr + text_data);
 488
 489        down_write(&current->mm->mmap_sem);     
 490        do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 491                interp_ex->a_bss);
 492        up_write(&current->mm->mmap_sem);
 493        elf_entry = interp_ex->a_entry;
 494
 495out:
 496        return elf_entry;
 497}
 498
 499/*
 500 * These are the functions used to load ELF style executables and shared
 501 * libraries.  There is no binary dependent code anywhere else.
 502 */
 503
 504#define INTERPRETER_NONE 0
 505#define INTERPRETER_AOUT 1
 506#define INTERPRETER_ELF 2
 507
 508#ifndef STACK_RND_MASK
 509#define STACK_RND_MASK 0x7ff            /* with 4K pages 8MB of VA */
 510#endif
 511
 512static unsigned long randomize_stack_top(unsigned long stack_top)
 513{
 514        unsigned int random_variable = 0;
 515
 516        if ((current->flags & PF_RANDOMIZE) &&
 517                !(current->personality & ADDR_NO_RANDOMIZE)) {
 518                random_variable = get_random_int() & STACK_RND_MASK;
 519                random_variable <<= PAGE_SHIFT;
 520        }
 521#ifdef CONFIG_STACK_GROWSUP
 522        return PAGE_ALIGN(stack_top) + random_variable;
 523#else
 524        return PAGE_ALIGN(stack_top) - random_variable;
 525#endif
 526}
 527
 528static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 529{
 530        struct file *interpreter = NULL; /* to shut gcc up */
 531        unsigned long load_addr = 0, load_bias = 0;
 532        int load_addr_set = 0;
 533        char * elf_interpreter = NULL;
 534        unsigned int interpreter_type = INTERPRETER_NONE;
 535        unsigned char ibcs2_interpreter = 0;
 536        unsigned long error;
 537        struct elf_phdr *elf_ppnt, *elf_phdata;
 538        unsigned long elf_bss, elf_brk;
 539        int elf_exec_fileno;
 540        int retval, i;
 541        unsigned int size;
 542        unsigned long elf_entry, interp_load_addr = 0;
 543        unsigned long start_code, end_code, start_data, end_data;
 544        unsigned long reloc_func_desc = 0;
 545        char passed_fileno[6];
 546        struct files_struct *files;
 547        int executable_stack = EXSTACK_DEFAULT;
 548        unsigned long def_flags = 0;
 549        struct {
 550                struct elfhdr elf_ex;
 551                struct elfhdr interp_elf_ex;
 552                struct exec interp_ex;
 553        } *loc;
 554
 555        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 556        if (!loc) {
 557                retval = -ENOMEM;
 558                goto out_ret;
 559        }
 560        
 561        /* Get the exec-header */
 562        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 563
 564        retval = -ENOEXEC;
 565        /* First of all, some simple consistency checks */
 566        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 567                goto out;
 568
 569        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 570                goto out;
 571        if (!elf_check_arch(&loc->elf_ex))
 572                goto out;
 573        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 574                goto out;
 575
 576        /* Now read in all of the header information */
 577        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 578                goto out;
 579        if (loc->elf_ex.e_phnum < 1 ||
 580                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 581                goto out;
 582        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 583        retval = -ENOMEM;
 584        elf_phdata = kmalloc(size, GFP_KERNEL);
 585        if (!elf_phdata)
 586                goto out;
 587
 588        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 589                             (char *)elf_phdata, size);
 590        if (retval != size) {
 591                if (retval >= 0)
 592                        retval = -EIO;
 593                goto out_free_ph;
 594        }
 595
 596        files = current->files; /* Refcounted so ok */
 597        retval = unshare_files();
 598        if (retval < 0)
 599                goto out_free_ph;
 600        if (files == current->files) {
 601                put_files_struct(files);
 602                files = NULL;
 603        }
 604
 605        /* exec will make our files private anyway, but for the a.out
 606           loader stuff we need to do it earlier */
 607        retval = get_unused_fd();
 608        if (retval < 0)
 609                goto out_free_fh;
 610        get_file(bprm->file);
 611        fd_install(elf_exec_fileno = retval, bprm->file);
 612
 613        elf_ppnt = elf_phdata;
 614        elf_bss = 0;
 615        elf_brk = 0;
 616
 617        start_code = ~0UL;
 618        end_code = 0;
 619        start_data = 0;
 620        end_data = 0;
 621
 622        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 623                if (elf_ppnt->p_type == PT_INTERP) {
 624                        /* This is the program interpreter used for
 625                         * shared libraries - for now assume that this
 626                         * is an a.out format binary
 627                         */
 628                        retval = -ENOEXEC;
 629                        if (elf_ppnt->p_filesz > PATH_MAX || 
 630                            elf_ppnt->p_filesz < 2)
 631                                goto out_free_file;
 632
 633                        retval = -ENOMEM;
 634                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 635                                                  GFP_KERNEL);
 636                        if (!elf_interpreter)
 637                                goto out_free_file;
 638
 639                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 640                                             elf_interpreter,
 641                                             elf_ppnt->p_filesz);
 642                        if (retval != elf_ppnt->p_filesz) {
 643                                if (retval >= 0)
 644                                        retval = -EIO;
 645                                goto out_free_interp;
 646                        }
 647                        /* make sure path is NULL terminated */
 648                        retval = -ENOEXEC;
 649                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 650                                goto out_free_interp;
 651
 652                        /* If the program interpreter is one of these two,
 653                         * then assume an iBCS2 image. Otherwise assume
 654                         * a native linux image.
 655                         */
 656                        if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 657                            strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 658                                ibcs2_interpreter = 1;
 659
 660                        /*
 661                         * The early SET_PERSONALITY here is so that the lookup
 662                         * for the interpreter happens in the namespace of the 
 663                         * to-be-execed image.  SET_PERSONALITY can select an
 664                         * alternate root.
 665                         *
 666                         * However, SET_PERSONALITY is NOT allowed to switch
 667                         * this task into the new images's memory mapping
 668                         * policy - that is, TASK_SIZE must still evaluate to
 669                         * that which is appropriate to the execing application.
 670                         * This is because exit_mmap() needs to have TASK_SIZE
 671                         * evaluate to the size of the old image.
 672                         *
 673                         * So if (say) a 64-bit application is execing a 32-bit
 674                         * application it is the architecture's responsibility
 675                         * to defer changing the value of TASK_SIZE until the
 676                         * switch really is going to happen - do this in
 677                         * flush_thread().      - akpm
 678                         */
 679                        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 680
 681                        interpreter = open_exec(elf_interpreter);
 682                        retval = PTR_ERR(interpreter);
 683                        if (IS_ERR(interpreter))
 684                                goto out_free_interp;
 685
 686                        /*
 687                         * If the binary is not readable then enforce
 688                         * mm->dumpable = 0 regardless of the interpreter's
 689                         * permissions.
 690                         */
 691                        if (file_permission(interpreter, MAY_READ) < 0)
 692                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 693
 694                        retval = kernel_read(interpreter, 0, bprm->buf,
 695                                             BINPRM_BUF_SIZE);
 696                        if (retval != BINPRM_BUF_SIZE) {
 697                                if (retval >= 0)
 698                                        retval = -EIO;
 699                                goto out_free_dentry;
 700                        }
 701
 702                        /* Get the exec headers */
 703                        loc->interp_ex = *((struct exec *)bprm->buf);
 704                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 705                        break;
 706                }
 707                elf_ppnt++;
 708        }
 709
 710        elf_ppnt = elf_phdata;
 711        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 712                if (elf_ppnt->p_type == PT_GNU_STACK) {
 713                        if (elf_ppnt->p_flags & PF_X)
 714                                executable_stack = EXSTACK_ENABLE_X;
 715                        else
 716                                executable_stack = EXSTACK_DISABLE_X;
 717                        break;
 718                }
 719
 720        /* Some simple consistency checks for the interpreter */
 721        if (elf_interpreter) {
 722                interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 723
 724                /* Now figure out which format our binary is */
 725                if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
 726                    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 727                    (N_MAGIC(loc->interp_ex) != QMAGIC))
 728                        interpreter_type = INTERPRETER_ELF;
 729
 730                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 731                        interpreter_type &= ~INTERPRETER_ELF;
 732
 733                retval = -ELIBBAD;
 734                if (!interpreter_type)
 735                        goto out_free_dentry;
 736
 737                /* Make sure only one type was selected */
 738                if ((interpreter_type & INTERPRETER_ELF) &&
 739                     interpreter_type != INTERPRETER_ELF) {
 740                        // FIXME - ratelimit this before re-enabling
 741                        // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
 742                        interpreter_type = INTERPRETER_ELF;
 743                }
 744                /* Verify the interpreter has a valid arch */
 745                if ((interpreter_type == INTERPRETER_ELF) &&
 746                    !elf_check_arch(&loc->interp_elf_ex))
 747                        goto out_free_dentry;
 748        } else {
 749                /* Executables without an interpreter also need a personality  */
 750                SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 751        }
 752
 753        /* OK, we are done with that, now set up the arg stuff,
 754           and then start this sucker up */
 755        if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 756                char *passed_p = passed_fileno;
 757                sprintf(passed_fileno, "%d", elf_exec_fileno);
 758
 759                if (elf_interpreter) {
 760                        retval = copy_strings_kernel(1, &passed_p, bprm);
 761                        if (retval)
 762                                goto out_free_dentry; 
 763                        bprm->argc++;
 764                }
 765        }
 766
 767        /* Flush all traces of the currently running executable */
 768        retval = flush_old_exec(bprm);
 769        if (retval)
 770                goto out_free_dentry;
 771
 772        /* Discard our unneeded old files struct */
 773        if (files) {
 774                put_files_struct(files);
 775                files = NULL;
 776        }
 777
 778        /* OK, This is the point of no return */
 779        current->mm->start_data = 0;
 780        current->mm->end_data = 0;
 781        current->mm->end_code = 0;
 782        current->mm->mmap = NULL;
 783        current->flags &= ~PF_FORKNOEXEC;
 784        current->mm->def_flags = def_flags;
 785
 786        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 787           may depend on the personality.  */
 788        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 789        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 790                current->personality |= READ_IMPLIES_EXEC;
 791
 792        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 793                current->flags |= PF_RANDOMIZE;
 794        arch_pick_mmap_layout(current->mm);
 795
 796        /* Do this so that we can load the interpreter, if need be.  We will
 797           change some of these later */
 798        current->mm->free_area_cache = current->mm->mmap_base;
 799        current->mm->cached_hole_size = 0;
 800        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 801                                 executable_stack);
 802        if (retval < 0) {
 803                send_sig(SIGKILL, current, 0);
 804                goto out_free_dentry;
 805        }
 806        
 807        current->mm->start_stack = bprm->p;
 808
 809        /* Now we do a little grungy work by mmaping the ELF image into
 810           the correct location in memory.  At this point, we assume that
 811           the image should be loaded at fixed address, not at a variable
 812           address. */
 813        for(i = 0, elf_ppnt = elf_phdata;
 814            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 815                int elf_prot = 0, elf_flags;
 816                unsigned long k, vaddr;
 817
 818                if (elf_ppnt->p_type != PT_LOAD)
 819                        continue;
 820
 821                if (unlikely (elf_brk > elf_bss)) {
 822                        unsigned long nbyte;
 823                    
 824                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 825                           before this one. Map anonymous pages, if needed,
 826                           and clear the area.  */
 827                        retval = set_brk (elf_bss + load_bias,
 828                                          elf_brk + load_bias);
 829                        if (retval) {
 830                                send_sig(SIGKILL, current, 0);
 831                                goto out_free_dentry;
 832                        }
 833                        nbyte = ELF_PAGEOFFSET(elf_bss);
 834                        if (nbyte) {
 835                                nbyte = ELF_MIN_ALIGN - nbyte;
 836                                if (nbyte > elf_brk - elf_bss)
 837                                        nbyte = elf_brk - elf_bss;
 838                                if (clear_user((void __user *)elf_bss +
 839                                                        load_bias, nbyte)) {
 840                                        /*
 841                                         * This bss-zeroing can fail if the ELF
 842                                         * file specifies odd protections. So
 843                                         * we don't check the return value
 844                                         */
 845                                }
 846                        }
 847                }
 848
 849                if (elf_ppnt->p_flags & PF_R)
 850                        elf_prot |= PROT_READ;
 851                if (elf_ppnt->p_flags & PF_W)
 852                        elf_prot |= PROT_WRITE;
 853                if (elf_ppnt->p_flags & PF_X)
 854                        elf_prot |= PROT_EXEC;
 855
 856                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 857
 858                vaddr = elf_ppnt->p_vaddr;
 859                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 860                        elf_flags |= MAP_FIXED;
 861                } else if (loc->elf_ex.e_type == ET_DYN) {
 862                        /* Try and get dynamic programs out of the way of the
 863                         * default mmap base, as well as whatever program they
 864                         * might try to exec.  This is because the brk will
 865                         * follow the loader, and is not movable.  */
 866                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 867                }
 868
 869                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 870                                elf_prot, elf_flags);
 871                if (BAD_ADDR(error)) {
 872                        send_sig(SIGKILL, current, 0);
 873                        goto out_free_dentry;
 874                }
 875
 876                if (!load_addr_set) {
 877                        load_addr_set = 1;
 878                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 879                        if (loc->elf_ex.e_type == ET_DYN) {
 880                                load_bias += error -
 881                                             ELF_PAGESTART(load_bias + vaddr);
 882                                load_addr += load_bias;
 883                                reloc_func_desc = load_bias;
 884                        }
 885                }
 886                k = elf_ppnt->p_vaddr;
 887                if (k < start_code)
 888                        start_code = k;
 889                if (start_data < k)
 890                        start_data = k;
 891
 892                /*
 893                 * Check to see if the section's size will overflow the
 894                 * allowed task size. Note that p_filesz must always be
 895                 * <= p_memsz so it is only necessary to check p_memsz.
 896                 */
 897                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 898                    elf_ppnt->p_memsz > TASK_SIZE ||
 899                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 900                        /* set_brk can never work. Avoid overflows. */
 901                        send_sig(SIGKILL, current, 0);
 902                        goto out_free_dentry;
 903                }
 904
 905                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 906
 907                if (k > elf_bss)
 908                        elf_bss = k;
 909                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 910                        end_code = k;
 911                if (end_data < k)
 912                        end_data = k;
 913                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 914                if (k > elf_brk)
 915                        elf_brk = k;
 916        }
 917
 918        loc->elf_ex.e_entry += load_bias;
 919        elf_bss += load_bias;
 920        elf_brk += load_bias;
 921        start_code += load_bias;
 922        end_code += load_bias;
 923        start_data += load_bias;
 924        end_data += load_bias;
 925
 926        /* Calling set_brk effectively mmaps the pages that we need
 927         * for the bss and break sections.  We must do this before
 928         * mapping in the interpreter, to make sure it doesn't wind
 929         * up getting placed where the bss needs to go.
 930         */
 931        retval = set_brk(elf_bss, elf_brk);
 932        if (retval) {
 933                send_sig(SIGKILL, current, 0);
 934                goto out_free_dentry;
 935        }
 936        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 937                send_sig(SIGSEGV, current, 0);
 938                retval = -EFAULT; /* Nobody gets to see this, but.. */
 939                goto out_free_dentry;
 940        }
 941
 942        if (elf_interpreter) {
 943                if (interpreter_type == INTERPRETER_AOUT)
 944                        elf_entry = load_aout_interp(&loc->interp_ex,
 945                                                     interpreter);
 946                else
 947                        elf_entry = load_elf_interp(&loc->interp_elf_ex,
 948                                                    interpreter,
 949                                                    &interp_load_addr);
 950                if (BAD_ADDR(elf_entry)) {
 951                        force_sig(SIGSEGV, current);
 952                        retval = IS_ERR((void *)elf_entry) ?
 953                                        (int)elf_entry : -EINVAL;
 954                        goto out_free_dentry;
 955                }
 956                reloc_func_desc = interp_load_addr;
 957
 958                allow_write_access(interpreter);
 959                fput(interpreter);
 960                kfree(elf_interpreter);
 961        } else {
 962                elf_entry = loc->elf_ex.e_entry;
 963                if (BAD_ADDR(elf_entry)) {
 964                        force_sig(SIGSEGV, current);
 965                        retval = -EINVAL;
 966                        goto out_free_dentry;
 967                }
 968        }
 969
 970        kfree(elf_phdata);
 971
 972        if (interpreter_type != INTERPRETER_AOUT)
 973                sys_close(elf_exec_fileno);
 974
 975        set_binfmt(&elf_format);
 976
 977#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 978        retval = arch_setup_additional_pages(bprm, executable_stack);
 979        if (retval < 0) {
 980                send_sig(SIGKILL, current, 0);
 981                goto out;
 982        }
 983#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 984
 985        compute_creds(bprm);
 986        current->flags &= ~PF_FORKNOEXEC;
 987        create_elf_tables(bprm, &loc->elf_ex,
 988                          (interpreter_type == INTERPRETER_AOUT),
 989                          load_addr, interp_load_addr);
 990        /* N.B. passed_fileno might not be initialized? */
 991        if (interpreter_type == INTERPRETER_AOUT)
 992                current->mm->arg_start += strlen(passed_fileno) + 1;
 993        current->mm->end_code = end_code;
 994        current->mm->start_code = start_code;
 995        current->mm->start_data = start_data;
 996        current->mm->end_data = end_data;
 997        current->mm->start_stack = bprm->p;
 998
 999        if (current->personality & MMAP_PAGE_ZERO) {
1000                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1001                   and some applications "depend" upon this behavior.
1002                   Since we do not have the power to recompile these, we
1003                   emulate the SVr4 behavior. Sigh. */
1004                down_write(&current->mm->mmap_sem);
1005                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1006                                MAP_FIXED | MAP_PRIVATE, 0);
1007                up_write(&current->mm->mmap_sem);
1008        }
1009
1010#ifdef ELF_PLAT_INIT
1011        /*
1012         * The ABI may specify that certain registers be set up in special
1013         * ways (on i386 %edx is the address of a DT_FINI function, for
1014         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1015         * that the e_entry field is the address of the function descriptor
1016         * for the startup routine, rather than the address of the startup
1017         * routine itself.  This macro performs whatever initialization to
1018         * the regs structure is required as well as any relocations to the
1019         * function descriptor entries when executing dynamically links apps.
1020         */
1021        ELF_PLAT_INIT(regs, reloc_func_desc);
1022#endif
1023
1024        start_thread(regs, elf_entry, bprm->p);
1025        if (unlikely(current->ptrace & PT_PTRACED)) {
1026                if (current->ptrace & PT_TRACE_EXEC)
1027                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1028                else
1029                        send_sig(SIGTRAP, current, 0);
1030        }
1031        retval = 0;
1032out:
1033        kfree(loc);
1034out_ret:
1035        return retval;
1036
1037        /* error cleanup */
1038out_free_dentry:
1039        allow_write_access(interpreter);
1040        if (interpreter)
1041                fput(interpreter);
1042out_free_interp:
1043        kfree(elf_interpreter);
1044out_free_file:
1045        sys_close(elf_exec_fileno);
1046out_free_fh:
1047        if (files)
1048                reset_files_struct(current, files);
1049out_free_ph:
1050        kfree(elf_phdata);
1051        goto out;
1052}
1053
1054/* This is really simpleminded and specialized - we are loading an
1055   a.out library that is given an ELF header. */
1056static int load_elf_library(struct file *file)
1057{
1058        struct elf_phdr *elf_phdata;
1059        struct elf_phdr *eppnt;
1060        unsigned long elf_bss, bss, len;
1061        int retval, error, i, j;
1062        struct elfhdr elf_ex;
1063
1064        error = -ENOEXEC;
1065        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1066        if (retval != sizeof(elf_ex))
1067                goto out;
1068
1069        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1070                goto out;
1071
1072        /* First of all, some simple consistency checks */
1073        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1074            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1075                goto out;
1076
1077        /* Now read in all of the header information */
1078
1079        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1080        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1081
1082        error = -ENOMEM;
1083        elf_phdata = kmalloc(j, GFP_KERNEL);
1084        if (!elf_phdata)
1085                goto out;
1086
1087        eppnt = elf_phdata;
1088        error = -ENOEXEC;
1089        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1090        if (retval != j)
1091                goto out_free_ph;
1092
1093        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1094                if ((eppnt + i)->p_type == PT_LOAD)
1095                        j++;
1096        if (j != 1)
1097                goto out_free_ph;
1098
1099        while (eppnt->p_type != PT_LOAD)
1100                eppnt++;
1101
1102        /* Now use mmap to map the library into memory. */
1103        down_write(&current->mm->mmap_sem);
1104        error = do_mmap(file,
1105                        ELF_PAGESTART(eppnt->p_vaddr),
1106                        (eppnt->p_filesz +
1107                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1108                        PROT_READ | PROT_WRITE | PROT_EXEC,
1109                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1110                        (eppnt->p_offset -
1111                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1112        up_write(&current->mm->mmap_sem);
1113        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1114                goto out_free_ph;
1115
1116        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1117        if (padzero(elf_bss)) {
1118                error = -EFAULT;
1119                goto out_free_ph;
1120        }
1121
1122        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1123                            ELF_MIN_ALIGN - 1);
1124        bss = eppnt->p_memsz + eppnt->p_vaddr;
1125        if (bss > len) {
1126                down_write(&current->mm->mmap_sem);
1127                do_brk(len, bss - len);
1128                up_write(&current->mm->mmap_sem);
1129        }
1130        error = 0;
1131
1132out_free_ph:
1133        kfree(elf_phdata);
1134out:
1135        return error;
1136}
1137
1138/*
1139 * Note that some platforms still use traditional core dumps and not
1140 * the ELF core dump.  Each platform can select it as appropriate.
1141 */
1142#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1143
1144/*
1145 * ELF core dumper
1146 *
1147 * Modelled on fs/exec.c:aout_core_dump()
1148 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1149 */
1150/*
1151 * These are the only things you should do on a core-file: use only these
1152 * functions to write out all the necessary info.
1153 */
1154static int dump_write(struct file *file, const void *addr, int nr)
1155{
1156        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1157}
1158
1159static int dump_seek(struct file *file, loff_t off)
1160{
1161        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1162                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1163                        return 0;
1164        } else {
1165                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1166                if (!buf)
1167                        return 0;
1168                while (off > 0) {
1169                        unsigned long n = off;
1170                        if (n > PAGE_SIZE)
1171                                n = PAGE_SIZE;
1172                        if (!dump_write(file, buf, n))
1173                                return 0;
1174                        off -= n;
1175                }
1176                free_page((unsigned long)buf);
1177        }
1178        return 1;
1179}
1180
1181/*
1182 * Decide whether a segment is worth dumping; default is yes to be
1183 * sure (missing info is worse than too much; etc).
1184 * Personally I'd include everything, and use the coredump limit...
1185 *
1186 * I think we should skip something. But I am not sure how. H.J.
1187 */
1188static int maydump(struct vm_area_struct *vma)
1189{
1190        /* The vma can be set up to tell us the answer directly.  */
1191        if (vma->vm_flags & VM_ALWAYSDUMP)
1192                return 1;
1193
1194        /* Do not dump I/O mapped devices or special mappings */
1195        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1196                return 0;
1197
1198        /* Dump shared memory only if mapped from an anonymous file. */
1199        if (vma->vm_flags & VM_SHARED)
1200                return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1201
1202        /* If it hasn't been written to, don't write it out */
1203        if (!vma->anon_vma)
1204                return 0;
1205
1206        return 1;
1207}
1208
1209/* An ELF note in memory */
1210struct memelfnote
1211{
1212        const char *name;
1213        int type;
1214        unsigned int datasz;
1215        void *data;
1216};
1217
1218static int notesize(struct memelfnote *en)
1219{
1220        int sz;
1221
1222        sz = sizeof(struct elf_note);
1223        sz += roundup(strlen(en->name) + 1, 4);
1224        sz += roundup(en->datasz, 4);
1225
1226        return sz;
1227}
1228
1229#define DUMP_WRITE(addr, nr, foffset)   \
1230        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1231
1232static int alignfile(struct file *file, loff_t *foffset)
1233{
1234        static const char buf[4] = { 0, };
1235        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1236        return 1;
1237}
1238
1239static int writenote(struct memelfnote *men, struct file *file,
1240                        loff_t *foffset)
1241{
1242        struct elf_note en;
1243        en.n_namesz = strlen(men->name) + 1;
1244        en.n_descsz = men->datasz;
1245        en.n_type = men->type;
1246
1247        DUMP_WRITE(&en, sizeof(en), foffset);
1248        DUMP_WRITE(men->name, en.n_namesz, foffset);
1249        if (!alignfile(file, foffset))
1250                return 0;
1251        DUMP_WRITE(men->data, men->datasz, foffset);
1252        if (!alignfile(file, foffset))
1253                return 0;
1254
1255        return 1;
1256}
1257#undef DUMP_WRITE
1258
1259#define DUMP_WRITE(addr, nr)    \
1260        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1261                goto end_coredump;
1262#define DUMP_SEEK(off)  \
1263        if (!dump_seek(file, (off))) \
1264                goto end_coredump;
1265
1266static void fill_elf_header(struct elfhdr *elf, int segs)
1267{
1268        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1269        elf->e_ident[EI_CLASS] = ELF_CLASS;
1270        elf->e_ident[EI_DATA] = ELF_DATA;
1271        elf->e_ident[EI_VERSION] = EV_CURRENT;
1272        elf->e_ident[EI_OSABI] = ELF_OSABI;
1273        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1274
1275        elf->e_type = ET_CORE;
1276        elf->e_machine = ELF_ARCH;
1277        elf->e_version = EV_CURRENT;
1278        elf->e_entry = 0;
1279        elf->e_phoff = sizeof(struct elfhdr);
1280        elf->e_shoff = 0;
1281        elf->e_flags = ELF_CORE_EFLAGS;
1282        elf->e_ehsize = sizeof(struct elfhdr);
1283        elf->e_phentsize = sizeof(struct elf_phdr);
1284        elf->e_phnum = segs;
1285        elf->e_shentsize = 0;
1286        elf->e_shnum = 0;
1287        elf->e_shstrndx = 0;
1288        return;
1289}
1290
1291static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1292{
1293        phdr->p_type = PT_NOTE;
1294        phdr->p_offset = offset;
1295        phdr->p_vaddr = 0;
1296        phdr->p_paddr = 0;
1297        phdr->p_filesz = sz;
1298        phdr->p_memsz = 0;
1299        phdr->p_flags = 0;
1300        phdr->p_align = 0;
1301        return;
1302}
1303
1304static void fill_note(struct memelfnote *note, const char *name, int type, 
1305                unsigned int sz, void *data)
1306{
1307        note->name = name;
1308        note->type = type;
1309        note->datasz = sz;
1310        note->data = data;
1311        return;
1312}
1313
1314/*
1315 * fill up all the fields in prstatus from the given task struct, except
1316 * registers which need to be filled up separately.
1317 */
1318static void fill_prstatus(struct elf_prstatus *prstatus,
1319                struct task_struct *p, long signr)
1320{
1321        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1322        prstatus->pr_sigpend = p->pending.signal.sig[0];
1323        prstatus->pr_sighold = p->blocked.sig[0];
1324        prstatus->pr_pid = p->pid;
1325        prstatus->pr_ppid = p->parent->pid;
1326        prstatus->pr_pgrp = process_group(p);
1327        prstatus->pr_sid = process_session(p);
1328        if (thread_group_leader(p)) {
1329                /*
1330                 * This is the record for the group leader.  Add in the
1331                 * cumulative times of previous dead threads.  This total
1332                 * won't include the time of each live thread whose state
1333                 * is included in the core dump.  The final total reported
1334                 * to our parent process when it calls wait4 will include
1335                 * those sums as well as the little bit more time it takes
1336                 * this and each other thread to finish dying after the
1337                 * core dump synchronization phase.
1338                 */
1339                cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1340                                   &prstatus->pr_utime);
1341                cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1342                                   &prstatus->pr_stime);
1343        } else {
1344                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1345                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1346        }
1347        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1348        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1349}
1350
1351static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1352                       struct mm_struct *mm)
1353{
1354        unsigned int i, len;
1355        
1356        /* first copy the parameters from user space */
1357        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1358
1359        len = mm->arg_end - mm->arg_start;
1360        if (len >= ELF_PRARGSZ)
1361                len = ELF_PRARGSZ-1;
1362        if (copy_from_user(&psinfo->pr_psargs,
1363                           (const char __user *)mm->arg_start, len))
1364                return -EFAULT;
1365        for(i = 0; i < len; i++)
1366                if (psinfo->pr_psargs[i] == 0)
1367                        psinfo->pr_psargs[i] = ' ';
1368        psinfo->pr_psargs[len] = 0;
1369
1370        psinfo->pr_pid = p->pid;
1371        psinfo->pr_ppid = p->parent->pid;
1372        psinfo->pr_pgrp = process_group(p);
1373        psinfo->pr_sid = process_session(p);
1374
1375        i = p->state ? ffz(~p->state) + 1 : 0;
1376        psinfo->pr_state = i;
1377        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1378        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1379        psinfo->pr_nice = task_nice(p);
1380        psinfo->pr_flag = p->flags;
1381        SET_UID(psinfo->pr_uid, p->uid);
1382        SET_GID(psinfo->pr_gid, p->gid);
1383        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1384        
1385        return 0;
1386}
1387
1388/* Here is the structure in which status of each thread is captured. */
1389struct elf_thread_status
1390{
1391        struct list_head list;
1392        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1393        elf_fpregset_t fpu;             /* NT_PRFPREG */
1394        struct task_struct *thread;
1395#ifdef ELF_CORE_COPY_XFPREGS
1396        elf_fpxregset_t xfpu;           /* NT_PRXFPREG */
1397#endif
1398        struct memelfnote notes[3];
1399        int num_notes;
1400};
1401
1402/*
1403 * In order to add the specific thread information for the elf file format,
1404 * we need to keep a linked list of every threads pr_status and then create
1405 * a single section for them in the final core file.
1406 */
1407static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1408{
1409        int sz = 0;
1410        struct task_struct *p = t->thread;
1411        t->num_notes = 0;
1412
1413        fill_prstatus(&t->prstatus, p, signr);
1414        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1415        
1416        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1417                  &(t->prstatus));
1418        t->num_notes++;
1419        sz += notesize(&t->notes[0]);
1420
1421        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1422                                                                &t->fpu))) {
1423                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1424                          &(t->fpu));
1425                t->num_notes++;
1426                sz += notesize(&t->notes[1]);
1427        }
1428
1429#ifdef ELF_CORE_COPY_XFPREGS
1430        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1431                fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1432                          &t->xfpu);
1433                t->num_notes++;
1434                sz += notesize(&t->notes[2]);
1435        }
1436#endif  
1437        return sz;
1438}
1439
1440static struct vm_area_struct *first_vma(struct task_struct *tsk,
1441                                        struct vm_area_struct *gate_vma)
1442{
1443        struct vm_area_struct *ret = tsk->mm->mmap;
1444
1445        if (ret)
1446                return ret;
1447        return gate_vma;
1448}
1449/*
1450 * Helper function for iterating across a vma list.  It ensures that the caller
1451 * will visit `gate_vma' prior to terminating the search.
1452 */
1453static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1454                                        struct vm_area_struct *gate_vma)
1455{
1456        struct vm_area_struct *ret;
1457
1458        ret = this_vma->vm_next;
1459        if (ret)
1460                return ret;
1461        if (this_vma == gate_vma)
1462                return NULL;
1463        return gate_vma;
1464}
1465
1466/*
1467 * Actual dumper
1468 *
1469 * This is a two-pass process; first we find the offsets of the bits,
1470 * and then they are actually written out.  If we run out of core limit
1471 * we just truncate.
1472 */
1473static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1474{
1475#define NUM_NOTES       6
1476        int has_dumped = 0;
1477        mm_segment_t fs;
1478        int segs;
1479        size_t size = 0;
1480        int i;
1481        struct vm_area_struct *vma, *gate_vma;
1482        struct elfhdr *elf = NULL;
1483        loff_t offset = 0, dataoff, foffset;
1484        unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1485        int numnote;
1486        struct memelfnote *notes = NULL;
1487        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
1488        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
1489        struct task_struct *g, *p;
1490        LIST_HEAD(thread_list);
1491        struct list_head *t;
1492        elf_fpregset_t *fpu = NULL;
1493#ifdef ELF_CORE_COPY_XFPREGS
1494        elf_fpxregset_t *xfpu = NULL;
1495#endif
1496        int thread_status_size = 0;
1497        elf_addr_t *auxv;
1498
1499        /*
1500         * We no longer stop all VM operations.
1501         * 
1502         * This is because those proceses that could possibly change map_count
1503         * or the mmap / vma pages are now blocked in do_exit on current
1504         * finishing this core dump.
1505         *
1506         * Only ptrace can touch these memory addresses, but it doesn't change
1507         * the map_count or the pages allocated. So no possibility of crashing
1508         * exists while dumping the mm->vm_next areas to the core file.
1509         */
1510  
1511        /* alloc memory for large data structures: too large to be on stack */
1512        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1513        if (!elf)
1514                goto cleanup;
1515        prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1516        if (!prstatus)
1517                goto cleanup;
1518        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1519        if (!psinfo)
1520                goto cleanup;
1521        notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1522        if (!notes)
1523                goto cleanup;
1524        fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1525        if (!fpu)
1526                goto cleanup;
1527#ifdef ELF_CORE_COPY_XFPREGS
1528        xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1529        if (!xfpu)
1530                goto cleanup;
1531#endif
1532
1533        if (signr) {
1534                struct elf_thread_status *tmp;
1535                rcu_read_lock();
1536                do_each_thread(g,p)
1537                        if (current->mm == p->mm && current != p) {
1538                                tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1539                                if (!tmp) {
1540                                        rcu_read_unlock();
1541                                        goto cleanup;
1542                                }
1543                                tmp->thread = p;
1544                                list_add(&tmp->list, &thread_list);
1545                        }
1546                while_each_thread(g,p);
1547                rcu_read_unlock();
1548                list_for_each(t, &thread_list) {
1549                        struct elf_thread_status *tmp;
1550                        int sz;
1551
1552                        tmp = list_entry(t, struct elf_thread_status, list);
1553                        sz = elf_dump_thread_status(signr, tmp);
1554                        thread_status_size += sz;
1555                }
1556        }
1557        /* now collect the dump for the current */
1558        memset(prstatus, 0, sizeof(*prstatus));
1559        fill_prstatus(prstatus, current, signr);
1560        elf_core_copy_regs(&prstatus->pr_reg, regs);
1561        
1562        segs = current->mm->map_count;
1563#ifdef ELF_CORE_EXTRA_PHDRS
1564        segs += ELF_CORE_EXTRA_PHDRS;
1565#endif
1566
1567        gate_vma = get_gate_vma(current);
1568        if (gate_vma != NULL)
1569                segs++;
1570
1571        /* Set up header */
1572        fill_elf_header(elf, segs + 1); /* including notes section */
1573
1574        has_dumped = 1;
1575        current->flags |= PF_DUMPCORE;
1576
1577        /*
1578         * Set up the notes in similar form to SVR4 core dumps made
1579         * with info from their /proc.
1580         */
1581
1582        fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1583        fill_psinfo(psinfo, current->group_leader, current->mm);
1584        fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1585        
1586        numnote = 2;
1587
1588        auxv = (elf_addr_t *)current->mm->saved_auxv;
1589
1590        i = 0;
1591        do
1592                i += 2;
1593        while (auxv[i - 2] != AT_NULL);
1594        fill_note(&notes[numnote++], "CORE", NT_AUXV,
1595                  i * sizeof(elf_addr_t), auxv);
1596
1597        /* Try to dump the FPU. */
1598        if ((prstatus->pr_fpvalid =
1599             elf_core_copy_task_fpregs(current, regs, fpu)))
1600                fill_note(notes + numnote++,
1601                          "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1602#ifdef ELF_CORE_COPY_XFPREGS
1603        if (elf_core_copy_task_xfpregs(current, xfpu))
1604                fill_note(notes + numnote++,
1605                          "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1606#endif  
1607  
1608        fs = get_fs();
1609        set_fs(KERNEL_DS);
1610
1611        DUMP_WRITE(elf, sizeof(*elf));
1612        offset += sizeof(*elf);                         /* Elf header */
1613        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1614        foffset = offset;
1615
1616        /* Write notes phdr entry */
1617        {
1618                struct elf_phdr phdr;
1619                int sz = 0;
1620
1621                for (i = 0; i < numnote; i++)
1622                        sz += notesize(notes + i);
1623                
1624                sz += thread_status_size;
1625
1626#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1627                sz += ELF_CORE_EXTRA_NOTES_SIZE;
1628#endif
1629
1630                fill_elf_note_phdr(&phdr, sz, offset);
1631                offset += sz;
1632                DUMP_WRITE(&phdr, sizeof(phdr));
1633        }
1634
1635        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1636
1637        /* Write program headers for segments dump */
1638        for (vma = first_vma(current, gate_vma); vma != NULL;
1639                        vma = next_vma(vma, gate_vma)) {
1640                struct elf_phdr phdr;
1641                size_t sz;
1642
1643                sz = vma->vm_end - vma->vm_start;
1644
1645                phdr.p_type = PT_LOAD;
1646                phdr.p_offset = offset;
1647                phdr.p_vaddr = vma->vm_start;
1648                phdr.p_paddr = 0;
1649                phdr.p_filesz = maydump(vma) ? sz : 0;
1650                phdr.p_memsz = sz;
1651                offset += phdr.p_filesz;
1652                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1653                if (vma->vm_flags & VM_WRITE)
1654                        phdr.p_flags |= PF_W;
1655                if (vma->vm_flags & VM_EXEC)
1656                        phdr.p_flags |= PF_X;
1657                phdr.p_align = ELF_EXEC_PAGESIZE;
1658
1659                DUMP_WRITE(&phdr, sizeof(phdr));
1660        }
1661
1662#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1663        ELF_CORE_WRITE_EXTRA_PHDRS;
1664#endif
1665
1666        /* write out the notes section */
1667        for (i = 0; i < numnote; i++)
1668                if (!writenote(notes + i, file, &foffset))
1669                        goto end_coredump;
1670
1671#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1672        ELF_CORE_WRITE_EXTRA_NOTES;
1673#endif
1674
1675        /* write out the thread status notes section */
1676        list_for_each(t, &thread_list) {
1677                struct elf_thread_status *tmp =
1678                                list_entry(t, struct elf_thread_status, list);
1679
1680                for (i = 0; i < tmp->num_notes; i++)
1681                        if (!writenote(&tmp->notes[i], file, &foffset))
1682                                goto end_coredump;
1683        }
1684
1685        /* Align to page */
1686        DUMP_SEEK(dataoff - foffset);
1687
1688        for (vma = first_vma(current, gate_vma); vma != NULL;
1689                        vma = next_vma(vma, gate_vma)) {
1690                unsigned long addr;
1691
1692                if (!maydump(vma))
1693                        continue;
1694
1695                for (addr = vma->vm_start;
1696                     addr < vma->vm_end;
1697                     addr += PAGE_SIZE) {
1698                        struct page *page;
1699                        struct vm_area_struct *vma;
1700
1701                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1702                                                &page, &vma) <= 0) {
1703                                DUMP_SEEK(PAGE_SIZE);
1704                        } else {
1705                                if (page == ZERO_PAGE(addr)) {
1706                                        DUMP_SEEK(PAGE_SIZE);
1707                                } else {
1708                                        void *kaddr;
1709                                        flush_cache_page(vma, addr,
1710                                                         page_to_pfn(page));
1711                                        kaddr = kmap(page);
1712                                        if ((size += PAGE_SIZE) > limit ||
1713                                            !dump_write(file, kaddr,
1714                                            PAGE_SIZE)) {
1715                                                kunmap(page);
1716                                                page_cache_release(page);
1717                                                goto end_coredump;
1718                                        }
1719                                        kunmap(page);
1720                                }
1721                                page_cache_release(page);
1722                        }
1723                }
1724        }
1725
1726#ifdef ELF_CORE_WRITE_EXTRA_DATA
1727        ELF_CORE_WRITE_EXTRA_DATA;
1728#endif
1729
1730end_coredump:
1731        set_fs(fs);
1732
1733cleanup:
1734        while (!list_empty(&thread_list)) {
1735                struct list_head *tmp = thread_list.next;
1736                list_del(tmp);
1737                kfree(list_entry(tmp, struct elf_thread_status, list));
1738        }
1739
1740        kfree(elf);
1741        kfree(prstatus);
1742        kfree(psinfo);
1743        kfree(notes);
1744        kfree(fpu);
1745#ifdef ELF_CORE_COPY_XFPREGS
1746        kfree(xfpu);
1747#endif
1748        return has_dumped;
1749#undef NUM_NOTES
1750}
1751
1752#endif          /* USE_ELF_CORE_DUMP */
1753
1754static int __init init_elf_binfmt(void)
1755{
1756        return register_binfmt(&elf_format);
1757}
1758
1759static void __exit exit_elf_binfmt(void)
1760{
1761        /* Remove the COFF and ELF loaders. */
1762        unregister_binfmt(&elf_format);
1763}
1764
1765core_initcall(init_elf_binfmt);
1766module_exit(exit_elf_binfmt);
1767MODULE_LICENSE("GPL");
1768
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.