linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/security.h>
  31#include <linux/random.h>
  32#include <linux/elf.h>
  33#include <linux/utsname.h>
  34#include <linux/coredump.h>
  35#include <asm/uaccess.h>
  36#include <asm/param.h>
  37#include <asm/page.h>
  38#include <asm/exec.h>
  39
  40static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  41static int load_elf_library(struct file *);
  42static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  43                                int, int, unsigned long);
  44
  45/*
  46 * If we don't support core dumping, then supply a NULL so we
  47 * don't even try.
  48 */
  49#ifdef CONFIG_ELF_CORE
  50static int elf_core_dump(struct coredump_params *cprm);
  51#else
  52#define elf_core_dump   NULL
  53#endif
  54
  55#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  56#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  57#else
  58#define ELF_MIN_ALIGN   PAGE_SIZE
  59#endif
  60
  61#ifndef ELF_CORE_EFLAGS
  62#define ELF_CORE_EFLAGS 0
  63#endif
  64
  65#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  66#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  67#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  68
  69static struct linux_binfmt elf_format = {
  70        .module         = THIS_MODULE,
  71        .load_binary    = load_elf_binary,
  72        .load_shlib     = load_elf_library,
  73        .core_dump      = elf_core_dump,
  74        .min_coredump   = ELF_EXEC_PAGESIZE,
  75};
  76
  77#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  78
  79static int set_brk(unsigned long start, unsigned long end)
  80{
  81        start = ELF_PAGEALIGN(start);
  82        end = ELF_PAGEALIGN(end);
  83        if (end > start) {
  84                unsigned long addr;
  85                addr = vm_brk(start, end - start);
  86                if (BAD_ADDR(addr))
  87                        return addr;
  88        }
  89        current->mm->start_brk = current->mm->brk = end;
  90        return 0;
  91}
  92
  93/* We need to explicitly zero any fractional pages
  94   after the data section (i.e. bss).  This would
  95   contain the junk from the file that should not
  96   be in memory
  97 */
  98static int padzero(unsigned long elf_bss)
  99{
 100        unsigned long nbyte;
 101
 102        nbyte = ELF_PAGEOFFSET(elf_bss);
 103        if (nbyte) {
 104                nbyte = ELF_MIN_ALIGN - nbyte;
 105                if (clear_user((void __user *) elf_bss, nbyte))
 106                        return -EFAULT;
 107        }
 108        return 0;
 109}
 110
 111/* Let's use some macros to make this stack manipulation a little clearer */
 112#ifdef CONFIG_STACK_GROWSUP
 113#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 114#define STACK_ROUND(sp, items) \
 115        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 116#define STACK_ALLOC(sp, len) ({ \
 117        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 118        old_sp; })
 119#else
 120#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 121#define STACK_ROUND(sp, items) \
 122        (((unsigned long) (sp - items)) &~ 15UL)
 123#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 124#endif
 125
 126#ifndef ELF_BASE_PLATFORM
 127/*
 128 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 129 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 130 * will be copied to the user stack in the same manner as AT_PLATFORM.
 131 */
 132#define ELF_BASE_PLATFORM NULL
 133#endif
 134
 135static int
 136create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 137                unsigned long load_addr, unsigned long interp_load_addr)
 138{
 139        unsigned long p = bprm->p;
 140        int argc = bprm->argc;
 141        int envc = bprm->envc;
 142        elf_addr_t __user *argv;
 143        elf_addr_t __user *envp;
 144        elf_addr_t __user *sp;
 145        elf_addr_t __user *u_platform;
 146        elf_addr_t __user *u_base_platform;
 147        elf_addr_t __user *u_rand_bytes;
 148        const char *k_platform = ELF_PLATFORM;
 149        const char *k_base_platform = ELF_BASE_PLATFORM;
 150        unsigned char k_rand_bytes[16];
 151        int items;
 152        elf_addr_t *elf_info;
 153        int ei_index = 0;
 154        const struct cred *cred = current_cred();
 155        struct vm_area_struct *vma;
 156
 157        /*
 158         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 159         * evictions by the processes running on the same package. One
 160         * thing we can do is to shuffle the initial stack for them.
 161         */
 162
 163        p = arch_align_stack(p);
 164
 165        /*
 166         * If this architecture has a platform capability string, copy it
 167         * to userspace.  In some cases (Sparc), this info is impossible
 168         * for userspace to get any other way, in others (i386) it is
 169         * merely difficult.
 170         */
 171        u_platform = NULL;
 172        if (k_platform) {
 173                size_t len = strlen(k_platform) + 1;
 174
 175                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 176                if (__copy_to_user(u_platform, k_platform, len))
 177                        return -EFAULT;
 178        }
 179
 180        /*
 181         * If this architecture has a "base" platform capability
 182         * string, copy it to userspace.
 183         */
 184        u_base_platform = NULL;
 185        if (k_base_platform) {
 186                size_t len = strlen(k_base_platform) + 1;
 187
 188                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 189                if (__copy_to_user(u_base_platform, k_base_platform, len))
 190                        return -EFAULT;
 191        }
 192
 193        /*
 194         * Generate 16 random bytes for userspace PRNG seeding.
 195         */
 196        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 197        u_rand_bytes = (elf_addr_t __user *)
 198                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 199        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 200                return -EFAULT;
 201
 202        /* Create the ELF interpreter info */
 203        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 204        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 205#define NEW_AUX_ENT(id, val) \
 206        do { \
 207                elf_info[ei_index++] = id; \
 208                elf_info[ei_index++] = val; \
 209        } while (0)
 210
 211#ifdef ARCH_DLINFO
 212        /* 
 213         * ARCH_DLINFO must come first so PPC can do its special alignment of
 214         * AUXV.
 215         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 216         * ARCH_DLINFO changes
 217         */
 218        ARCH_DLINFO;
 219#endif
 220        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 221        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 222        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 223        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 224        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 225        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 226        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 227        NEW_AUX_ENT(AT_FLAGS, 0);
 228        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 229        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 230        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 231        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 232        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 233        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 234        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 235        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 236        if (k_platform) {
 237                NEW_AUX_ENT(AT_PLATFORM,
 238                            (elf_addr_t)(unsigned long)u_platform);
 239        }
 240        if (k_base_platform) {
 241                NEW_AUX_ENT(AT_BASE_PLATFORM,
 242                            (elf_addr_t)(unsigned long)u_base_platform);
 243        }
 244        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 245                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 246        }
 247#undef NEW_AUX_ENT
 248        /* AT_NULL is zero; clear the rest too */
 249        memset(&elf_info[ei_index], 0,
 250               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 251
 252        /* And advance past the AT_NULL entry.  */
 253        ei_index += 2;
 254
 255        sp = STACK_ADD(p, ei_index);
 256
 257        items = (argc + 1) + (envc + 1) + 1;
 258        bprm->p = STACK_ROUND(sp, items);
 259
 260        /* Point sp at the lowest address on the stack */
 261#ifdef CONFIG_STACK_GROWSUP
 262        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 263        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 264#else
 265        sp = (elf_addr_t __user *)bprm->p;
 266#endif
 267
 268
 269        /*
 270         * Grow the stack manually; some architectures have a limit on how
 271         * far ahead a user-space access may be in order to grow the stack.
 272         */
 273        vma = find_extend_vma(current->mm, bprm->p);
 274        if (!vma)
 275                return -EFAULT;
 276
 277        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 278        if (__put_user(argc, sp++))
 279                return -EFAULT;
 280        argv = sp;
 281        envp = argv + argc + 1;
 282
 283        /* Populate argv and envp */
 284        p = current->mm->arg_end = current->mm->arg_start;
 285        while (argc-- > 0) {
 286                size_t len;
 287                if (__put_user((elf_addr_t)p, argv++))
 288                        return -EFAULT;
 289                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 290                if (!len || len > MAX_ARG_STRLEN)
 291                        return -EINVAL;
 292                p += len;
 293        }
 294        if (__put_user(0, argv))
 295                return -EFAULT;
 296        current->mm->arg_end = current->mm->env_start = p;
 297        while (envc-- > 0) {
 298                size_t len;
 299                if (__put_user((elf_addr_t)p, envp++))
 300                        return -EFAULT;
 301                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 302                if (!len || len > MAX_ARG_STRLEN)
 303                        return -EINVAL;
 304                p += len;
 305        }
 306        if (__put_user(0, envp))
 307                return -EFAULT;
 308        current->mm->env_end = p;
 309
 310        /* Put the elf_info on the stack in the right place.  */
 311        sp = (elf_addr_t __user *)envp + 1;
 312        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 313                return -EFAULT;
 314        return 0;
 315}
 316
 317static unsigned long elf_map(struct file *filep, unsigned long addr,
 318                struct elf_phdr *eppnt, int prot, int type,
 319                unsigned long total_size)
 320{
 321        unsigned long map_addr;
 322        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 323        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 324        addr = ELF_PAGESTART(addr);
 325        size = ELF_PAGEALIGN(size);
 326
 327        /* mmap() will return -EINVAL if given a zero size, but a
 328         * segment with zero filesize is perfectly valid */
 329        if (!size)
 330                return addr;
 331
 332        /*
 333        * total_size is the size of the ELF (interpreter) image.
 334        * The _first_ mmap needs to know the full size, otherwise
 335        * randomization might put this image into an overlapping
 336        * position with the ELF binary image. (since size < total_size)
 337        * So we first map the 'big' image - and unmap the remainder at
 338        * the end. (which unmap is needed for ELF images with holes.)
 339        */
 340        if (total_size) {
 341                total_size = ELF_PAGEALIGN(total_size);
 342                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 343                if (!BAD_ADDR(map_addr))
 344                        vm_munmap(map_addr+size, total_size-size);
 345        } else
 346                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 347
 348        return(map_addr);
 349}
 350
 351static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 352{
 353        int i, first_idx = -1, last_idx = -1;
 354
 355        for (i = 0; i < nr; i++) {
 356                if (cmds[i].p_type == PT_LOAD) {
 357                        last_idx = i;
 358                        if (first_idx == -1)
 359                                first_idx = i;
 360                }
 361        }
 362        if (first_idx == -1)
 363                return 0;
 364
 365        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 366                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 367}
 368
 369
 370/* This is much more generalized than the library routine read function,
 371   so we keep this separate.  Technically the library read function
 372   is only provided so that we can read a.out libraries that have
 373   an ELF header */
 374
 375static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 376                struct file *interpreter, unsigned long *interp_map_addr,
 377                unsigned long no_base)
 378{
 379        struct elf_phdr *elf_phdata;
 380        struct elf_phdr *eppnt;
 381        unsigned long load_addr = 0;
 382        int load_addr_set = 0;
 383        unsigned long last_bss = 0, elf_bss = 0;
 384        unsigned long error = ~0UL;
 385        unsigned long total_size;
 386        int retval, i, size;
 387
 388        /* First of all, some simple consistency checks */
 389        if (interp_elf_ex->e_type != ET_EXEC &&
 390            interp_elf_ex->e_type != ET_DYN)
 391                goto out;
 392        if (!elf_check_arch(interp_elf_ex))
 393                goto out;
 394        if (!interpreter->f_op || !interpreter->f_op->mmap)
 395                goto out;
 396
 397        /*
 398         * If the size of this structure has changed, then punt, since
 399         * we will be doing the wrong thing.
 400         */
 401        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 402                goto out;
 403        if (interp_elf_ex->e_phnum < 1 ||
 404                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 405                goto out;
 406
 407        /* Now read in all of the header information */
 408        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 409        if (size > ELF_MIN_ALIGN)
 410                goto out;
 411        elf_phdata = kmalloc(size, GFP_KERNEL);
 412        if (!elf_phdata)
 413                goto out;
 414
 415        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 416                             (char *)elf_phdata, size);
 417        error = -EIO;
 418        if (retval != size) {
 419                if (retval < 0)
 420                        error = retval; 
 421                goto out_close;
 422        }
 423
 424        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 425        if (!total_size) {
 426                error = -EINVAL;
 427                goto out_close;
 428        }
 429
 430        eppnt = elf_phdata;
 431        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 432                if (eppnt->p_type == PT_LOAD) {
 433                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 434                        int elf_prot = 0;
 435                        unsigned long vaddr = 0;
 436                        unsigned long k, map_addr;
 437
 438                        if (eppnt->p_flags & PF_R)
 439                                elf_prot = PROT_READ;
 440                        if (eppnt->p_flags & PF_W)
 441                                elf_prot |= PROT_WRITE;
 442                        if (eppnt->p_flags & PF_X)
 443                                elf_prot |= PROT_EXEC;
 444                        vaddr = eppnt->p_vaddr;
 445                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 446                                elf_type |= MAP_FIXED;
 447                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 448                                load_addr = -vaddr;
 449
 450                        map_addr = elf_map(interpreter, load_addr + vaddr,
 451                                        eppnt, elf_prot, elf_type, total_size);
 452                        total_size = 0;
 453                        if (!*interp_map_addr)
 454                                *interp_map_addr = map_addr;
 455                        error = map_addr;
 456                        if (BAD_ADDR(map_addr))
 457                                goto out_close;
 458
 459                        if (!load_addr_set &&
 460                            interp_elf_ex->e_type == ET_DYN) {
 461                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 462                                load_addr_set = 1;
 463                        }
 464
 465                        /*
 466                         * Check to see if the section's size will overflow the
 467                         * allowed task size. Note that p_filesz must always be
 468                         * <= p_memsize so it's only necessary to check p_memsz.
 469                         */
 470                        k = load_addr + eppnt->p_vaddr;
 471                        if (BAD_ADDR(k) ||
 472                            eppnt->p_filesz > eppnt->p_memsz ||
 473                            eppnt->p_memsz > TASK_SIZE ||
 474                            TASK_SIZE - eppnt->p_memsz < k) {
 475                                error = -ENOMEM;
 476                                goto out_close;
 477                        }
 478
 479                        /*
 480                         * Find the end of the file mapping for this phdr, and
 481                         * keep track of the largest address we see for this.
 482                         */
 483                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 484                        if (k > elf_bss)
 485                                elf_bss = k;
 486
 487                        /*
 488                         * Do the same thing for the memory mapping - between
 489                         * elf_bss and last_bss is the bss section.
 490                         */
 491                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 492                        if (k > last_bss)
 493                                last_bss = k;
 494                }
 495        }
 496
 497        if (last_bss > elf_bss) {
 498                /*
 499                 * Now fill out the bss section.  First pad the last page up
 500                 * to the page boundary, and then perform a mmap to make sure
 501                 * that there are zero-mapped pages up to and including the
 502                 * last bss page.
 503                 */
 504                if (padzero(elf_bss)) {
 505                        error = -EFAULT;
 506                        goto out_close;
 507                }
 508
 509                /* What we have mapped so far */
 510                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 511
 512                /* Map the last of the bss segment */
 513                error = vm_brk(elf_bss, last_bss - elf_bss);
 514                if (BAD_ADDR(error))
 515                        goto out_close;
 516        }
 517
 518        error = load_addr;
 519
 520out_close:
 521        kfree(elf_phdata);
 522out:
 523        return error;
 524}
 525
 526/*
 527 * These are the functions used to load ELF style executables and shared
 528 * libraries.  There is no binary dependent code anywhere else.
 529 */
 530
 531#define INTERPRETER_NONE 0
 532#define INTERPRETER_ELF 2
 533
 534#ifndef STACK_RND_MASK
 535#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 536#endif
 537
 538static unsigned long randomize_stack_top(unsigned long stack_top)
 539{
 540        unsigned int random_variable = 0;
 541
 542        if ((current->flags & PF_RANDOMIZE) &&
 543                !(current->personality & ADDR_NO_RANDOMIZE)) {
 544                random_variable = get_random_int() & STACK_RND_MASK;
 545                random_variable <<= PAGE_SHIFT;
 546        }
 547#ifdef CONFIG_STACK_GROWSUP
 548        return PAGE_ALIGN(stack_top) + random_variable;
 549#else
 550        return PAGE_ALIGN(stack_top) - random_variable;
 551#endif
 552}
 553
 554static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 555{
 556        struct file *interpreter = NULL; /* to shut gcc up */
 557        unsigned long load_addr = 0, load_bias = 0;
 558        int load_addr_set = 0;
 559        char * elf_interpreter = NULL;
 560        unsigned long error;
 561        struct elf_phdr *elf_ppnt, *elf_phdata;
 562        unsigned long elf_bss, elf_brk;
 563        int retval, i;
 564        unsigned int size;
 565        unsigned long elf_entry;
 566        unsigned long interp_load_addr = 0;
 567        unsigned long start_code, end_code, start_data, end_data;
 568        unsigned long reloc_func_desc __maybe_unused = 0;
 569        int executable_stack = EXSTACK_DEFAULT;
 570        unsigned long def_flags = 0;
 571        struct {
 572                struct elfhdr elf_ex;
 573                struct elfhdr interp_elf_ex;
 574        } *loc;
 575
 576        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 577        if (!loc) {
 578                retval = -ENOMEM;
 579                goto out_ret;
 580        }
 581        
 582        /* Get the exec-header */
 583        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 584
 585        retval = -ENOEXEC;
 586        /* First of all, some simple consistency checks */
 587        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 588                goto out;
 589
 590        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 591                goto out;
 592        if (!elf_check_arch(&loc->elf_ex))
 593                goto out;
 594        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 595                goto out;
 596
 597        /* Now read in all of the header information */
 598        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 599                goto out;
 600        if (loc->elf_ex.e_phnum < 1 ||
 601                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 602                goto out;
 603        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 604        retval = -ENOMEM;
 605        elf_phdata = kmalloc(size, GFP_KERNEL);
 606        if (!elf_phdata)
 607                goto out;
 608
 609        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 610                             (char *)elf_phdata, size);
 611        if (retval != size) {
 612                if (retval >= 0)
 613                        retval = -EIO;
 614                goto out_free_ph;
 615        }
 616
 617        elf_ppnt = elf_phdata;
 618        elf_bss = 0;
 619        elf_brk = 0;
 620
 621        start_code = ~0UL;
 622        end_code = 0;
 623        start_data = 0;
 624        end_data = 0;
 625
 626        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 627                if (elf_ppnt->p_type == PT_INTERP) {
 628                        /* This is the program interpreter used for
 629                         * shared libraries - for now assume that this
 630                         * is an a.out format binary
 631                         */
 632                        retval = -ENOEXEC;
 633                        if (elf_ppnt->p_filesz > PATH_MAX || 
 634                            elf_ppnt->p_filesz < 2)
 635                                goto out_free_ph;
 636
 637                        retval = -ENOMEM;
 638                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 639                                                  GFP_KERNEL);
 640                        if (!elf_interpreter)
 641                                goto out_free_ph;
 642
 643                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 644                                             elf_interpreter,
 645                                             elf_ppnt->p_filesz);
 646                        if (retval != elf_ppnt->p_filesz) {
 647                                if (retval >= 0)
 648                                        retval = -EIO;
 649                                goto out_free_interp;
 650                        }
 651                        /* make sure path is NULL terminated */
 652                        retval = -ENOEXEC;
 653                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 654                                goto out_free_interp;
 655
 656                        interpreter = open_exec(elf_interpreter);
 657                        retval = PTR_ERR(interpreter);
 658                        if (IS_ERR(interpreter))
 659                                goto out_free_interp;
 660
 661                        /*
 662                         * If the binary is not readable then enforce
 663                         * mm->dumpable = 0 regardless of the interpreter's
 664                         * permissions.
 665                         */
 666                        would_dump(bprm, interpreter);
 667
 668                        retval = kernel_read(interpreter, 0, bprm->buf,
 669                                             BINPRM_BUF_SIZE);
 670                        if (retval != BINPRM_BUF_SIZE) {
 671                                if (retval >= 0)
 672                                        retval = -EIO;
 673                                goto out_free_dentry;
 674                        }
 675
 676                        /* Get the exec headers */
 677                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 678                        break;
 679                }
 680                elf_ppnt++;
 681        }
 682
 683        elf_ppnt = elf_phdata;
 684        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 685                if (elf_ppnt->p_type == PT_GNU_STACK) {
 686                        if (elf_ppnt->p_flags & PF_X)
 687                                executable_stack = EXSTACK_ENABLE_X;
 688                        else
 689                                executable_stack = EXSTACK_DISABLE_X;
 690                        break;
 691                }
 692
 693        /* Some simple consistency checks for the interpreter */
 694        if (elf_interpreter) {
 695                retval = -ELIBBAD;
 696                /* Not an ELF interpreter */
 697                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 698                        goto out_free_dentry;
 699                /* Verify the interpreter has a valid arch */
 700                if (!elf_check_arch(&loc->interp_elf_ex))
 701                        goto out_free_dentry;
 702        }
 703
 704        /* Flush all traces of the currently running executable */
 705        retval = flush_old_exec(bprm);
 706        if (retval)
 707                goto out_free_dentry;
 708
 709        /* OK, This is the point of no return */
 710        current->mm->def_flags = def_flags;
 711
 712        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 713           may depend on the personality.  */
 714        SET_PERSONALITY(loc->elf_ex);
 715        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 716                current->personality |= READ_IMPLIES_EXEC;
 717
 718        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 719                current->flags |= PF_RANDOMIZE;
 720
 721        setup_new_exec(bprm);
 722
 723        /* Do this so that we can load the interpreter, if need be.  We will
 724           change some of these later */
 725        current->mm->free_area_cache = current->mm->mmap_base;
 726        current->mm->cached_hole_size = 0;
 727        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 728                                 executable_stack);
 729        if (retval < 0) {
 730                send_sig(SIGKILL, current, 0);
 731                goto out_free_dentry;
 732        }
 733        
 734        current->mm->start_stack = bprm->p;
 735
 736        /* Now we do a little grungy work by mmapping the ELF image into
 737           the correct location in memory. */
 738        for(i = 0, elf_ppnt = elf_phdata;
 739            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 740                int elf_prot = 0, elf_flags;
 741                unsigned long k, vaddr;
 742
 743                if (elf_ppnt->p_type != PT_LOAD)
 744                        continue;
 745
 746                if (unlikely (elf_brk > elf_bss)) {
 747                        unsigned long nbyte;
 748                    
 749                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 750                           before this one. Map anonymous pages, if needed,
 751                           and clear the area.  */
 752                        retval = set_brk(elf_bss + load_bias,
 753                                         elf_brk + load_bias);
 754                        if (retval) {
 755                                send_sig(SIGKILL, current, 0);
 756                                goto out_free_dentry;
 757                        }
 758                        nbyte = ELF_PAGEOFFSET(elf_bss);
 759                        if (nbyte) {
 760                                nbyte = ELF_MIN_ALIGN - nbyte;
 761                                if (nbyte > elf_brk - elf_bss)
 762                                        nbyte = elf_brk - elf_bss;
 763                                if (clear_user((void __user *)elf_bss +
 764                                                        load_bias, nbyte)) {
 765                                        /*
 766                                         * This bss-zeroing can fail if the ELF
 767                                         * file specifies odd protections. So
 768                                         * we don't check the return value
 769                                         */
 770                                }
 771                        }
 772                }
 773
 774                if (elf_ppnt->p_flags & PF_R)
 775                        elf_prot |= PROT_READ;
 776                if (elf_ppnt->p_flags & PF_W)
 777                        elf_prot |= PROT_WRITE;
 778                if (elf_ppnt->p_flags & PF_X)
 779                        elf_prot |= PROT_EXEC;
 780
 781                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 782
 783                vaddr = elf_ppnt->p_vaddr;
 784                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 785                        elf_flags |= MAP_FIXED;
 786                } else if (loc->elf_ex.e_type == ET_DYN) {
 787                        /* Try and get dynamic programs out of the way of the
 788                         * default mmap base, as well as whatever program they
 789                         * might try to exec.  This is because the brk will
 790                         * follow the loader, and is not movable.  */
 791#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 792                        /* Memory randomization might have been switched off
 793                         * in runtime via sysctl.
 794                         * If that is the case, retain the original non-zero
 795                         * load_bias value in order to establish proper
 796                         * non-randomized mappings.
 797                         */
 798                        if (current->flags & PF_RANDOMIZE)
 799                                load_bias = 0;
 800                        else
 801                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 802#else
 803                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 804#endif
 805                }
 806
 807                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 808                                elf_prot, elf_flags, 0);
 809                if (BAD_ADDR(error)) {
 810                        send_sig(SIGKILL, current, 0);
 811                        retval = IS_ERR((void *)error) ?
 812                                PTR_ERR((void*)error) : -EINVAL;
 813                        goto out_free_dentry;
 814                }
 815
 816                if (!load_addr_set) {
 817                        load_addr_set = 1;
 818                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 819                        if (loc->elf_ex.e_type == ET_DYN) {
 820                                load_bias += error -
 821                                             ELF_PAGESTART(load_bias + vaddr);
 822                                load_addr += load_bias;
 823                                reloc_func_desc = load_bias;
 824                        }
 825                }
 826                k = elf_ppnt->p_vaddr;
 827                if (k < start_code)
 828                        start_code = k;
 829                if (start_data < k)
 830                        start_data = k;
 831
 832                /*
 833                 * Check to see if the section's size will overflow the
 834                 * allowed task size. Note that p_filesz must always be
 835                 * <= p_memsz so it is only necessary to check p_memsz.
 836                 */
 837                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 838                    elf_ppnt->p_memsz > TASK_SIZE ||
 839                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 840                        /* set_brk can never work. Avoid overflows. */
 841                        send_sig(SIGKILL, current, 0);
 842                        retval = -EINVAL;
 843                        goto out_free_dentry;
 844                }
 845
 846                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 847
 848                if (k > elf_bss)
 849                        elf_bss = k;
 850                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 851                        end_code = k;
 852                if (end_data < k)
 853                        end_data = k;
 854                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 855                if (k > elf_brk)
 856                        elf_brk = k;
 857        }
 858
 859        loc->elf_ex.e_entry += load_bias;
 860        elf_bss += load_bias;
 861        elf_brk += load_bias;
 862        start_code += load_bias;
 863        end_code += load_bias;
 864        start_data += load_bias;
 865        end_data += load_bias;
 866
 867        /* Calling set_brk effectively mmaps the pages that we need
 868         * for the bss and break sections.  We must do this before
 869         * mapping in the interpreter, to make sure it doesn't wind
 870         * up getting placed where the bss needs to go.
 871         */
 872        retval = set_brk(elf_bss, elf_brk);
 873        if (retval) {
 874                send_sig(SIGKILL, current, 0);
 875                goto out_free_dentry;
 876        }
 877        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 878                send_sig(SIGSEGV, current, 0);
 879                retval = -EFAULT; /* Nobody gets to see this, but.. */
 880                goto out_free_dentry;
 881        }
 882
 883        if (elf_interpreter) {
 884                unsigned long uninitialized_var(interp_map_addr);
 885
 886                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 887                                            interpreter,
 888                                            &interp_map_addr,
 889                                            load_bias);
 890                if (!IS_ERR((void *)elf_entry)) {
 891                        /*
 892                         * load_elf_interp() returns relocation
 893                         * adjustment
 894                         */
 895                        interp_load_addr = elf_entry;
 896                        elf_entry += loc->interp_elf_ex.e_entry;
 897                }
 898                if (BAD_ADDR(elf_entry)) {
 899                        force_sig(SIGSEGV, current);
 900                        retval = IS_ERR((void *)elf_entry) ?
 901                                        (int)elf_entry : -EINVAL;
 902                        goto out_free_dentry;
 903                }
 904                reloc_func_desc = interp_load_addr;
 905
 906                allow_write_access(interpreter);
 907                fput(interpreter);
 908                kfree(elf_interpreter);
 909        } else {
 910                elf_entry = loc->elf_ex.e_entry;
 911                if (BAD_ADDR(elf_entry)) {
 912                        force_sig(SIGSEGV, current);
 913                        retval = -EINVAL;
 914                        goto out_free_dentry;
 915                }
 916        }
 917
 918        kfree(elf_phdata);
 919
 920        set_binfmt(&elf_format);
 921
 922#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 923        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 924        if (retval < 0) {
 925                send_sig(SIGKILL, current, 0);
 926                goto out;
 927        }
 928#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 929
 930        install_exec_creds(bprm);
 931        retval = create_elf_tables(bprm, &loc->elf_ex,
 932                          load_addr, interp_load_addr);
 933        if (retval < 0) {
 934                send_sig(SIGKILL, current, 0);
 935                goto out;
 936        }
 937        /* N.B. passed_fileno might not be initialized? */
 938        current->mm->end_code = end_code;
 939        current->mm->start_code = start_code;
 940        current->mm->start_data = start_data;
 941        current->mm->end_data = end_data;
 942        current->mm->start_stack = bprm->p;
 943
 944#ifdef arch_randomize_brk
 945        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 946                current->mm->brk = current->mm->start_brk =
 947                        arch_randomize_brk(current->mm);
 948#ifdef CONFIG_COMPAT_BRK
 949                current->brk_randomized = 1;
 950#endif
 951        }
 952#endif
 953
 954        if (current->personality & MMAP_PAGE_ZERO) {
 955                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 956                   and some applications "depend" upon this behavior.
 957                   Since we do not have the power to recompile these, we
 958                   emulate the SVr4 behavior. Sigh. */
 959                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 960                                MAP_FIXED | MAP_PRIVATE, 0);
 961        }
 962
 963#ifdef ELF_PLAT_INIT
 964        /*
 965         * The ABI may specify that certain registers be set up in special
 966         * ways (on i386 %edx is the address of a DT_FINI function, for
 967         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 968         * that the e_entry field is the address of the function descriptor
 969         * for the startup routine, rather than the address of the startup
 970         * routine itself.  This macro performs whatever initialization to
 971         * the regs structure is required as well as any relocations to the
 972         * function descriptor entries when executing dynamically links apps.
 973         */
 974        ELF_PLAT_INIT(regs, reloc_func_desc);
 975#endif
 976
 977        start_thread(regs, elf_entry, bprm->p);
 978        retval = 0;
 979out:
 980        kfree(loc);
 981out_ret:
 982        return retval;
 983
 984        /* error cleanup */
 985out_free_dentry:
 986        allow_write_access(interpreter);
 987        if (interpreter)
 988                fput(interpreter);
 989out_free_interp:
 990        kfree(elf_interpreter);
 991out_free_ph:
 992        kfree(elf_phdata);
 993        goto out;
 994}
 995
 996/* This is really simpleminded and specialized - we are loading an
 997   a.out library that is given an ELF header. */
 998static int load_elf_library(struct file *file)
 999{
1000        struct elf_phdr *elf_phdata;
1001        struct elf_phdr *eppnt;
1002        unsigned long elf_bss, bss, len;
1003        int retval, error, i, j;
1004        struct elfhdr elf_ex;
1005
1006        error = -ENOEXEC;
1007        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1008        if (retval != sizeof(elf_ex))
1009                goto out;
1010
1011        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1012                goto out;
1013
1014        /* First of all, some simple consistency checks */
1015        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1016            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1017                goto out;
1018
1019        /* Now read in all of the header information */
1020
1021        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1022        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1023
1024        error = -ENOMEM;
1025        elf_phdata = kmalloc(j, GFP_KERNEL);
1026        if (!elf_phdata)
1027                goto out;
1028
1029        eppnt = elf_phdata;
1030        error = -ENOEXEC;
1031        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1032        if (retval != j)
1033                goto out_free_ph;
1034
1035        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1036                if ((eppnt + i)->p_type == PT_LOAD)
1037                        j++;
1038        if (j != 1)
1039                goto out_free_ph;
1040
1041        while (eppnt->p_type != PT_LOAD)
1042                eppnt++;
1043
1044        /* Now use mmap to map the library into memory. */
1045        error = vm_mmap(file,
1046                        ELF_PAGESTART(eppnt->p_vaddr),
1047                        (eppnt->p_filesz +
1048                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1049                        PROT_READ | PROT_WRITE | PROT_EXEC,
1050                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1051                        (eppnt->p_offset -
1052                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1053        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1054                goto out_free_ph;
1055
1056        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1057        if (padzero(elf_bss)) {
1058                error = -EFAULT;
1059                goto out_free_ph;
1060        }
1061
1062        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1063                            ELF_MIN_ALIGN - 1);
1064        bss = eppnt->p_memsz + eppnt->p_vaddr;
1065        if (bss > len)
1066                vm_brk(len, bss - len);
1067        error = 0;
1068
1069out_free_ph:
1070        kfree(elf_phdata);
1071out:
1072        return error;
1073}
1074
1075#ifdef CONFIG_ELF_CORE
1076/*
1077 * ELF core dumper
1078 *
1079 * Modelled on fs/exec.c:aout_core_dump()
1080 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1081 */
1082
1083/*
1084 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1085 * that are useful for post-mortem analysis are included in every core dump.
1086 * In that way we ensure that the core dump is fully interpretable later
1087 * without matching up the same kernel and hardware config to see what PC values
1088 * meant. These special mappings include - vDSO, vsyscall, and other
1089 * architecture specific mappings
1090 */
1091static bool always_dump_vma(struct vm_area_struct *vma)
1092{
1093        /* Any vsyscall mappings? */
1094        if (vma == get_gate_vma(vma->vm_mm))
1095                return true;
1096        /*
1097         * arch_vma_name() returns non-NULL for special architecture mappings,
1098         * such as vDSO sections.
1099         */
1100        if (arch_vma_name(vma))
1101                return true;
1102
1103        return false;
1104}
1105
1106/*
1107 * Decide what to dump of a segment, part, all or none.
1108 */
1109static unsigned long vma_dump_size(struct vm_area_struct *vma,
1110                                   unsigned long mm_flags)
1111{
1112#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1113
1114        /* always dump the vdso and vsyscall sections */
1115        if (always_dump_vma(vma))
1116                goto whole;
1117
1118        if (vma->vm_flags & VM_NODUMP)
1119                return 0;
1120
1121        /* Hugetlb memory check */
1122        if (vma->vm_flags & VM_HUGETLB) {
1123                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1124                        goto whole;
1125                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1126                        goto whole;
1127        }
1128
1129        /* Do not dump I/O mapped devices or special mappings */
1130        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1131                return 0;
1132
1133        /* By default, dump shared memory if mapped from an anonymous file. */
1134        if (vma->vm_flags & VM_SHARED) {
1135                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1136                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1137                        goto whole;
1138                return 0;
1139        }
1140
1141        /* Dump segments that have been written to.  */
1142        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1143                goto whole;
1144        if (vma->vm_file == NULL)
1145                return 0;
1146
1147        if (FILTER(MAPPED_PRIVATE))
1148                goto whole;
1149
1150        /*
1151         * If this looks like the beginning of a DSO or executable mapping,
1152         * check for an ELF header.  If we find one, dump the first page to
1153         * aid in determining what was mapped here.
1154         */
1155        if (FILTER(ELF_HEADERS) &&
1156            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1157                u32 __user *header = (u32 __user *) vma->vm_start;
1158                u32 word;
1159                mm_segment_t fs = get_fs();
1160                /*
1161                 * Doing it this way gets the constant folded by GCC.
1162                 */
1163                union {
1164                        u32 cmp;
1165                        char elfmag[SELFMAG];
1166                } magic;
1167                BUILD_BUG_ON(SELFMAG != sizeof word);
1168                magic.elfmag[EI_MAG0] = ELFMAG0;
1169                magic.elfmag[EI_MAG1] = ELFMAG1;
1170                magic.elfmag[EI_MAG2] = ELFMAG2;
1171                magic.elfmag[EI_MAG3] = ELFMAG3;
1172                /*
1173                 * Switch to the user "segment" for get_user(),
1174                 * then put back what elf_core_dump() had in place.
1175                 */
1176                set_fs(USER_DS);
1177                if (unlikely(get_user(word, header)))
1178                        word = 0;
1179                set_fs(fs);
1180                if (word == magic.cmp)
1181                        return PAGE_SIZE;
1182        }
1183
1184#undef  FILTER
1185
1186        return 0;
1187
1188whole:
1189        return vma->vm_end - vma->vm_start;
1190}
1191
1192/* An ELF note in memory */
1193struct memelfnote
1194{
1195        const char *name;
1196        int type;
1197        unsigned int datasz;
1198        void *data;
1199};
1200
1201static int notesize(struct memelfnote *en)
1202{
1203        int sz;
1204
1205        sz = sizeof(struct elf_note);
1206        sz += roundup(strlen(en->name) + 1, 4);
1207        sz += roundup(en->datasz, 4);
1208
1209        return sz;
1210}
1211
1212#define DUMP_WRITE(addr, nr, foffset)   \
1213        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1214
1215static int alignfile(struct file *file, loff_t *foffset)
1216{
1217        static const char buf[4] = { 0, };
1218        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1219        return 1;
1220}
1221
1222static int writenote(struct memelfnote *men, struct file *file,
1223                        loff_t *foffset)
1224{
1225        struct elf_note en;
1226        en.n_namesz = strlen(men->name) + 1;
1227        en.n_descsz = men->datasz;
1228        en.n_type = men->type;
1229
1230        DUMP_WRITE(&en, sizeof(en), foffset);
1231        DUMP_WRITE(men->name, en.n_namesz, foffset);
1232        if (!alignfile(file, foffset))
1233                return 0;
1234        DUMP_WRITE(men->data, men->datasz, foffset);
1235        if (!alignfile(file, foffset))
1236                return 0;
1237
1238        return 1;
1239}
1240#undef DUMP_WRITE
1241
1242static void fill_elf_header(struct elfhdr *elf, int segs,
1243                            u16 machine, u32 flags, u8 osabi)
1244{
1245        memset(elf, 0, sizeof(*elf));
1246
1247        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1248        elf->e_ident[EI_CLASS] = ELF_CLASS;
1249        elf->e_ident[EI_DATA] = ELF_DATA;
1250        elf->e_ident[EI_VERSION] = EV_CURRENT;
1251        elf->e_ident[EI_OSABI] = ELF_OSABI;
1252
1253        elf->e_type = ET_CORE;
1254        elf->e_machine = machine;
1255        elf->e_version = EV_CURRENT;
1256        elf->e_phoff = sizeof(struct elfhdr);
1257        elf->e_flags = flags;
1258        elf->e_ehsize = sizeof(struct elfhdr);
1259        elf->e_phentsize = sizeof(struct elf_phdr);
1260        elf->e_phnum = segs;
1261
1262        return;
1263}
1264
1265static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266{
1267        phdr->p_type = PT_NOTE;
1268        phdr->p_offset = offset;
1269        phdr->p_vaddr = 0;
1270        phdr->p_paddr = 0;
1271        phdr->p_filesz = sz;
1272        phdr->p_memsz = 0;
1273        phdr->p_flags = 0;
1274        phdr->p_align = 0;
1275        return;
1276}
1277
1278static void fill_note(struct memelfnote *note, const char *name, int type, 
1279                unsigned int sz, void *data)
1280{
1281        note->name = name;
1282        note->type = type;
1283        note->datasz = sz;
1284        note->data = data;
1285        return;
1286}
1287
1288/*
1289 * fill up all the fields in prstatus from the given task struct, except
1290 * registers which need to be filled up separately.
1291 */
1292static void fill_prstatus(struct elf_prstatus *prstatus,
1293                struct task_struct *p, long signr)
1294{
1295        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1296        prstatus->pr_sigpend = p->pending.signal.sig[0];
1297        prstatus->pr_sighold = p->blocked.sig[0];
1298        rcu_read_lock();
1299        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1300        rcu_read_unlock();
1301        prstatus->pr_pid = task_pid_vnr(p);
1302        prstatus->pr_pgrp = task_pgrp_vnr(p);
1303        prstatus->pr_sid = task_session_vnr(p);
1304        if (thread_group_leader(p)) {
1305                struct task_cputime cputime;
1306
1307                /*
1308                 * This is the record for the group leader.  It shows the
1309                 * group-wide total, not its individual thread total.
1310                 */
1311                thread_group_cputime(p, &cputime);
1312                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1313                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1314        } else {
1315                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1316                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1317        }
1318        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1319        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1320}
1321
1322static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1323                       struct mm_struct *mm)
1324{
1325        const struct cred *cred;
1326        unsigned int i, len;
1327        
1328        /* first copy the parameters from user space */
1329        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1330
1331        len = mm->arg_end - mm->arg_start;
1332        if (len >= ELF_PRARGSZ)
1333                len = ELF_PRARGSZ-1;
1334        if (copy_from_user(&psinfo->pr_psargs,
1335                           (const char __user *)mm->arg_start, len))
1336                return -EFAULT;
1337        for(i = 0; i < len; i++)
1338                if (psinfo->pr_psargs[i] == 0)
1339                        psinfo->pr_psargs[i] = ' ';
1340        psinfo->pr_psargs[len] = 0;
1341
1342        rcu_read_lock();
1343        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1344        rcu_read_unlock();
1345        psinfo->pr_pid = task_pid_vnr(p);
1346        psinfo->pr_pgrp = task_pgrp_vnr(p);
1347        psinfo->pr_sid = task_session_vnr(p);
1348
1349        i = p->state ? ffz(~p->state) + 1 : 0;
1350        psinfo->pr_state = i;
1351        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1352        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1353        psinfo->pr_nice = task_nice(p);
1354        psinfo->pr_flag = p->flags;
1355        rcu_read_lock();
1356        cred = __task_cred(p);
1357        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1358        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1359        rcu_read_unlock();
1360        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1361        
1362        return 0;
1363}
1364
1365static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1366{
1367        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1368        int i = 0;
1369        do
1370                i += 2;
1371        while (auxv[i - 2] != AT_NULL);
1372        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1373}
1374
1375#ifdef CORE_DUMP_USE_REGSET
1376#include <linux/regset.h>
1377
1378struct elf_thread_core_info {
1379        struct elf_thread_core_info *next;
1380        struct task_struct *task;
1381        struct elf_prstatus prstatus;
1382        struct memelfnote notes[0];
1383};
1384
1385struct elf_note_info {
1386        struct elf_thread_core_info *thread;
1387        struct memelfnote psinfo;
1388        struct memelfnote auxv;
1389        size_t size;
1390        int thread_notes;
1391};
1392
1393/*
1394 * When a regset has a writeback hook, we call it on each thread before
1395 * dumping user memory.  On register window machines, this makes sure the
1396 * user memory backing the register data is up to date before we read it.
1397 */
1398static void do_thread_regset_writeback(struct task_struct *task,
1399                                       const struct user_regset *regset)
1400{
1401        if (regset->writeback)
1402                regset->writeback(task, regset, 1);
1403}
1404
1405#ifndef PR_REG_SIZE
1406#define PR_REG_SIZE(S) sizeof(S)
1407#endif
1408
1409#ifndef PRSTATUS_SIZE
1410#define PRSTATUS_SIZE(S) sizeof(S)
1411#endif
1412
1413#ifndef PR_REG_PTR
1414#define PR_REG_PTR(S) (&((S)->pr_reg))
1415#endif
1416
1417#ifndef SET_PR_FPVALID
1418#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1419#endif
1420
1421static int fill_thread_core_info(struct elf_thread_core_info *t,
1422                                 const struct user_regset_view *view,
1423                                 long signr, size_t *total)
1424{
1425        unsigned int i;
1426
1427        /*
1428         * NT_PRSTATUS is the one special case, because the regset data
1429         * goes into the pr_reg field inside the note contents, rather
1430         * than being the whole note contents.  We fill the reset in here.
1431         * We assume that regset 0 is NT_PRSTATUS.
1432         */
1433        fill_prstatus(&t->prstatus, t->task, signr);
1434        (void) view->regsets[0].get(t->task, &view->regsets[0],
1435                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1436                                    PR_REG_PTR(&t->prstatus), NULL);
1437
1438        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1439                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1440        *total += notesize(&t->notes[0]);
1441
1442        do_thread_regset_writeback(t->task, &view->regsets[0]);
1443
1444        /*
1445         * Each other regset might generate a note too.  For each regset
1446         * that has no core_note_type or is inactive, we leave t->notes[i]
1447         * all zero and we'll know to skip writing it later.
1448         */
1449        for (i = 1; i < view->n; ++i) {
1450                const struct user_regset *regset = &view->regsets[i];
1451                do_thread_regset_writeback(t->task, regset);
1452                if (regset->core_note_type && regset->get &&
1453                    (!regset->active || regset->active(t->task, regset))) {
1454                        int ret;
1455                        size_t size = regset->n * regset->size;
1456                        void *data = kmalloc(size, GFP_KERNEL);
1457                        if (unlikely(!data))
1458                                return 0;
1459                        ret = regset->get(t->task, regset,
1460                                          0, size, data, NULL);
1461                        if (unlikely(ret))
1462                                kfree(data);
1463                        else {
1464                                if (regset->core_note_type != NT_PRFPREG)
1465                                        fill_note(&t->notes[i], "LINUX",
1466                                                  regset->core_note_type,
1467                                                  size, data);
1468                                else {
1469                                        SET_PR_FPVALID(&t->prstatus, 1);
1470                                        fill_note(&t->notes[i], "CORE",
1471                                                  NT_PRFPREG, size, data);
1472                                }
1473                                *total += notesize(&t->notes[i]);
1474                        }
1475                }
1476        }
1477
1478        return 1;
1479}
1480
1481static int fill_note_info(struct elfhdr *elf, int phdrs,
1482                          struct elf_note_info *info,
1483                          long signr, struct pt_regs *regs)
1484{
1485        struct task_struct *dump_task = current;
1486        const struct user_regset_view *view = task_user_regset_view(dump_task);
1487        struct elf_thread_core_info *t;
1488        struct elf_prpsinfo *psinfo;
1489        struct core_thread *ct;
1490        unsigned int i;
1491
1492        info->size = 0;
1493        info->thread = NULL;
1494
1495        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1496        if (psinfo == NULL)
1497                return 0;
1498
1499        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1500
1501        /*
1502         * Figure out how many notes we're going to need for each thread.
1503         */
1504        info->thread_notes = 0;
1505        for (i = 0; i < view->n; ++i)
1506                if (view->regsets[i].core_note_type != 0)
1507                        ++info->thread_notes;
1508
1509        /*
1510         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1511         * since it is our one special case.
1512         */
1513        if (unlikely(info->thread_notes == 0) ||
1514            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1515                WARN_ON(1);
1516                return 0;
1517        }
1518
1519        /*
1520         * Initialize the ELF file header.
1521         */
1522        fill_elf_header(elf, phdrs,
1523                        view->e_machine, view->e_flags, view->ei_osabi);
1524
1525        /*
1526         * Allocate a structure for each thread.
1527         */
1528        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1529                t = kzalloc(offsetof(struct elf_thread_core_info,
1530                                     notes[info->thread_notes]),
1531                            GFP_KERNEL);
1532                if (unlikely(!t))
1533                        return 0;
1534
1535                t->task = ct->task;
1536                if (ct->task == dump_task || !info->thread) {
1537                        t->next = info->thread;
1538                        info->thread = t;
1539                } else {
1540                        /*
1541                         * Make sure to keep the original task at
1542                         * the head of the list.
1543                         */
1544                        t->next = info->thread->next;
1545                        info->thread->next = t;
1546                }
1547        }
1548
1549        /*
1550         * Now fill in each thread's information.
1551         */
1552        for (t = info->thread; t != NULL; t = t->next)
1553                if (!fill_thread_core_info(t, view, signr, &info->size))
1554                        return 0;
1555
1556        /*
1557         * Fill in the two process-wide notes.
1558         */
1559        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1560        info->size += notesize(&info->psinfo);
1561
1562        fill_auxv_note(&info->auxv, current->mm);
1563        info->size += notesize(&info->auxv);
1564
1565        return 1;
1566}
1567
1568static size_t get_note_info_size(struct elf_note_info *info)
1569{
1570        return info->size;
1571}
1572
1573/*
1574 * Write all the notes for each thread.  When writing the first thread, the
1575 * process-wide notes are interleaved after the first thread-specific note.
1576 */
1577static int write_note_info(struct elf_note_info *info,
1578                           struct file *file, loff_t *foffset)
1579{
1580        bool first = 1;
1581        struct elf_thread_core_info *t = info->thread;
1582
1583        do {
1584                int i;
1585
1586                if (!writenote(&t->notes[0], file, foffset))
1587                        return 0;
1588
1589                if (first && !writenote(&info->psinfo, file, foffset))
1590                        return 0;
1591                if (first && !writenote(&info->auxv, file, foffset))
1592                        return 0;
1593
1594                for (i = 1; i < info->thread_notes; ++i)
1595                        if (t->notes[i].data &&
1596                            !writenote(&t->notes[i], file, foffset))
1597                                return 0;
1598
1599                first = 0;
1600                t = t->next;
1601        } while (t);
1602
1603        return 1;
1604}
1605
1606static void free_note_info(struct elf_note_info *info)
1607{
1608        struct elf_thread_core_info *threads = info->thread;
1609        while (threads) {
1610                unsigned int i;
1611                struct elf_thread_core_info *t = threads;
1612                threads = t->next;
1613                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1614                for (i = 1; i < info->thread_notes; ++i)
1615                        kfree(t->notes[i].data);
1616                kfree(t);
1617        }
1618        kfree(info->psinfo.data);
1619}
1620
1621#else
1622
1623/* Here is the structure in which status of each thread is captured. */
1624struct elf_thread_status
1625{
1626        struct list_head list;
1627        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1628        elf_fpregset_t fpu;             /* NT_PRFPREG */
1629        struct task_struct *thread;
1630#ifdef ELF_CORE_COPY_XFPREGS
1631        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1632#endif
1633        struct memelfnote notes[3];
1634        int num_notes;
1635};
1636
1637/*
1638 * In order to add the specific thread information for the elf file format,
1639 * we need to keep a linked list of every threads pr_status and then create
1640 * a single section for them in the final core file.
1641 */
1642static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1643{
1644        int sz = 0;
1645        struct task_struct *p = t->thread;
1646        t->num_notes = 0;
1647
1648        fill_prstatus(&t->prstatus, p, signr);
1649        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1650        
1651        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1652                  &(t->prstatus));
1653        t->num_notes++;
1654        sz += notesize(&t->notes[0]);
1655
1656        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1657                                                                &t->fpu))) {
1658                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1659                          &(t->fpu));
1660                t->num_notes++;
1661                sz += notesize(&t->notes[1]);
1662        }
1663
1664#ifdef ELF_CORE_COPY_XFPREGS
1665        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1666                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1667                          sizeof(t->xfpu), &t->xfpu);
1668                t->num_notes++;
1669                sz += notesize(&t->notes[2]);
1670        }
1671#endif  
1672        return sz;
1673}
1674
1675struct elf_note_info {
1676        struct memelfnote *notes;
1677        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1678        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1679        struct list_head thread_list;
1680        elf_fpregset_t *fpu;
1681#ifdef ELF_CORE_COPY_XFPREGS
1682        elf_fpxregset_t *xfpu;
1683#endif
1684        int thread_status_size;
1685        int numnote;
1686};
1687
1688static int elf_note_info_init(struct elf_note_info *info)
1689{
1690        memset(info, 0, sizeof(*info));
1691        INIT_LIST_HEAD(&info->thread_list);
1692
1693        /* Allocate space for six ELF notes */
1694        info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1695        if (!info->notes)
1696                return 0;
1697        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698        if (!info->psinfo)
1699                return 0;
1700        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701        if (!info->prstatus)
1702                return 0;
1703        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704        if (!info->fpu)
1705                return 0;
1706#ifdef ELF_CORE_COPY_XFPREGS
1707        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708        if (!info->xfpu)
1709                return 0;
1710#endif
1711        return 1;
1712}
1713
1714static int fill_note_info(struct elfhdr *elf, int phdrs,
1715                          struct elf_note_info *info,
1716                          long signr, struct pt_regs *regs)
1717{
1718        struct list_head *t;
1719
1720        if (!elf_note_info_init(info))
1721                return 0;
1722
1723        if (signr) {
1724                struct core_thread *ct;
1725                struct elf_thread_status *ets;
1726
1727                for (ct = current->mm->core_state->dumper.next;
1728                                                ct; ct = ct->next) {
1729                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1730                        if (!ets)
1731                                return 0;
1732
1733                        ets->thread = ct->task;
1734                        list_add(&ets->list, &info->thread_list);
1735                }
1736
1737                list_for_each(t, &info->thread_list) {
1738                        int sz;
1739
1740                        ets = list_entry(t, struct elf_thread_status, list);
1741                        sz = elf_dump_thread_status(signr, ets);
1742                        info->thread_status_size += sz;
1743                }
1744        }
1745        /* now collect the dump for the current */
1746        memset(info->prstatus, 0, sizeof(*info->prstatus));
1747        fill_prstatus(info->prstatus, current, signr);
1748        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1749
1750        /* Set up header */
1751        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1752
1753        /*
1754         * Set up the notes in similar form to SVR4 core dumps made
1755         * with info from their /proc.
1756         */
1757
1758        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1759                  sizeof(*info->prstatus), info->prstatus);
1760        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1761        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1762                  sizeof(*info->psinfo), info->psinfo);
1763
1764        info->numnote = 2;
1765
1766        fill_auxv_note(&info->notes[info->numnote++], current->mm);
1767
1768        /* Try to dump the FPU. */
1769        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1770                                                               info->fpu);
1771        if (info->prstatus->pr_fpvalid)
1772                fill_note(info->notes + info->numnote++,
1773                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1774#ifdef ELF_CORE_COPY_XFPREGS
1775        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1776                fill_note(info->notes + info->numnote++,
1777                          "LINUX", ELF_CORE_XFPREG_TYPE,
1778                          sizeof(*info->xfpu), info->xfpu);
1779#endif
1780
1781        return 1;
1782}
1783
1784static size_t get_note_info_size(struct elf_note_info *info)
1785{
1786        int sz = 0;
1787        int i;
1788
1789        for (i = 0; i < info->numnote; i++)
1790                sz += notesize(info->notes + i);
1791
1792        sz += info->thread_status_size;
1793
1794        return sz;
1795}
1796
1797static int write_note_info(struct elf_note_info *info,
1798                           struct file *file, loff_t *foffset)
1799{
1800        int i;
1801        struct list_head *t;
1802
1803        for (i = 0; i < info->numnote; i++)
1804                if (!writenote(info->notes + i, file, foffset))
1805                        return 0;
1806
1807        /* write out the thread status notes section */
1808        list_for_each(t, &info->thread_list) {
1809                struct elf_thread_status *tmp =
1810                                list_entry(t, struct elf_thread_status, list);
1811
1812                for (i = 0; i < tmp->num_notes; i++)
1813                        if (!writenote(&tmp->notes[i], file, foffset))
1814                                return 0;
1815        }
1816
1817        return 1;
1818}
1819
1820static void free_note_info(struct elf_note_info *info)
1821{
1822        while (!list_empty(&info->thread_list)) {
1823                struct list_head *tmp = info->thread_list.next;
1824                list_del(tmp);
1825                kfree(list_entry(tmp, struct elf_thread_status, list));
1826        }
1827
1828        kfree(info->prstatus);
1829        kfree(info->psinfo);
1830        kfree(info->notes);
1831        kfree(info->fpu);
1832#ifdef ELF_CORE_COPY_XFPREGS
1833        kfree(info->xfpu);
1834#endif
1835}
1836
1837#endif
1838
1839static struct vm_area_struct *first_vma(struct task_struct *tsk,
1840                                        struct vm_area_struct *gate_vma)
1841{
1842        struct vm_area_struct *ret = tsk->mm->mmap;
1843
1844        if (ret)
1845                return ret;
1846        return gate_vma;
1847}
1848/*
1849 * Helper function for iterating across a vma list.  It ensures that the caller
1850 * will visit `gate_vma' prior to terminating the search.
1851 */
1852static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1853                                        struct vm_area_struct *gate_vma)
1854{
1855        struct vm_area_struct *ret;
1856
1857        ret = this_vma->vm_next;
1858        if (ret)
1859                return ret;
1860        if (this_vma == gate_vma)
1861                return NULL;
1862        return gate_vma;
1863}
1864
1865static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1866                             elf_addr_t e_shoff, int segs)
1867{
1868        elf->e_shoff = e_shoff;
1869        elf->e_shentsize = sizeof(*shdr4extnum);
1870        elf->e_shnum = 1;
1871        elf->e_shstrndx = SHN_UNDEF;
1872
1873        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1874
1875        shdr4extnum->sh_type = SHT_NULL;
1876        shdr4extnum->sh_size = elf->e_shnum;
1877        shdr4extnum->sh_link = elf->e_shstrndx;
1878        shdr4extnum->sh_info = segs;
1879}
1880
1881static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1882                                     unsigned long mm_flags)
1883{
1884        struct vm_area_struct *vma;
1885        size_t size = 0;
1886
1887        for (vma = first_vma(current, gate_vma); vma != NULL;
1888             vma = next_vma(vma, gate_vma))
1889                size += vma_dump_size(vma, mm_flags);
1890        return size;
1891}
1892
1893/*
1894 * Actual dumper
1895 *
1896 * This is a two-pass process; first we find the offsets of the bits,
1897 * and then they are actually written out.  If we run out of core limit
1898 * we just truncate.
1899 */
1900static int elf_core_dump(struct coredump_params *cprm)
1901{
1902        int has_dumped = 0;
1903        mm_segment_t fs;
1904        int segs;
1905        size_t size = 0;
1906        struct vm_area_struct *vma, *gate_vma;
1907        struct elfhdr *elf = NULL;
1908        loff_t offset = 0, dataoff, foffset;
1909        struct elf_note_info info;
1910        struct elf_phdr *phdr4note = NULL;
1911        struct elf_shdr *shdr4extnum = NULL;
1912        Elf_Half e_phnum;
1913        elf_addr_t e_shoff;
1914
1915        /*
1916         * We no longer stop all VM operations.
1917         * 
1918         * This is because those proceses that could possibly change map_count
1919         * or the mmap / vma pages are now blocked in do_exit on current
1920         * finishing this core dump.
1921         *
1922         * Only ptrace can touch these memory addresses, but it doesn't change
1923         * the map_count or the pages allocated. So no possibility of crashing
1924         * exists while dumping the mm->vm_next areas to the core file.
1925         */
1926  
1927        /* alloc memory for large data structures: too large to be on stack */
1928        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1929        if (!elf)
1930                goto out;
1931        /*
1932         * The number of segs are recored into ELF header as 16bit value.
1933         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1934         */
1935        segs = current->mm->map_count;
1936        segs += elf_core_extra_phdrs();
1937
1938        gate_vma = get_gate_vma(current->mm);
1939        if (gate_vma != NULL)
1940                segs++;
1941
1942        /* for notes section */
1943        segs++;
1944
1945        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1946         * this, kernel supports extended numbering. Have a look at
1947         * include/linux/elf.h for further information. */
1948        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1949
1950        /*
1951         * Collect all the non-memory information about the process for the
1952         * notes.  This also sets up the file header.
1953         */
1954        if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1955                goto cleanup;
1956
1957        has_dumped = 1;
1958        current->flags |= PF_DUMPCORE;
1959  
1960        fs = get_fs();
1961        set_fs(KERNEL_DS);
1962
1963        offset += sizeof(*elf);                         /* Elf header */
1964        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1965        foffset = offset;
1966
1967        /* Write notes phdr entry */
1968        {
1969                size_t sz = get_note_info_size(&info);
1970
1971                sz += elf_coredump_extra_notes_size();
1972
1973                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1974                if (!phdr4note)
1975                        goto end_coredump;
1976
1977                fill_elf_note_phdr(phdr4note, sz, offset);
1978                offset += sz;
1979        }
1980
1981        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1982
1983        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1984        offset += elf_core_extra_data_size();
1985        e_shoff = offset;
1986
1987        if (e_phnum == PN_XNUM) {
1988                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1989                if (!shdr4extnum)
1990                        goto end_coredump;
1991                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1992        }
1993
1994        offset = dataoff;
1995
1996        size += sizeof(*elf);
1997        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1998                goto end_coredump;
1999
2000        size += sizeof(*phdr4note);
2001        if (size > cprm->limit
2002            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2003                goto end_coredump;
2004
2005        /* Write program headers for segments dump */
2006        for (vma = first_vma(current, gate_vma); vma != NULL;
2007                        vma = next_vma(vma, gate_vma)) {
2008                struct elf_phdr phdr;
2009
2010                phdr.p_type = PT_LOAD;
2011                phdr.p_offset = offset;
2012                phdr.p_vaddr = vma->vm_start;
2013                phdr.p_paddr = 0;
2014                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2015                phdr.p_memsz = vma->vm_end - vma->vm_start;
2016                offset += phdr.p_filesz;
2017                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2018                if (vma->vm_flags & VM_WRITE)
2019                        phdr.p_flags |= PF_W;
2020                if (vma->vm_flags & VM_EXEC)
2021                        phdr.p_flags |= PF_X;
2022                phdr.p_align = ELF_EXEC_PAGESIZE;
2023
2024                size += sizeof(phdr);
2025                if (size > cprm->limit
2026                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2027                        goto end_coredump;
2028        }
2029
2030        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2031                goto end_coredump;
2032
2033        /* write out the notes section */
2034        if (!write_note_info(&info, cprm->file, &foffset))
2035                goto end_coredump;
2036
2037        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2038                goto end_coredump;
2039
2040        /* Align to page */
2041        if (!dump_seek(cprm->file, dataoff - foffset))
2042                goto end_coredump;
2043
2044        for (vma = first_vma(current, gate_vma); vma != NULL;
2045                        vma = next_vma(vma, gate_vma)) {
2046                unsigned long addr;
2047                unsigned long end;
2048
2049                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2050
2051                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2052                        struct page *page;
2053                        int stop;
2054
2055                        page = get_dump_page(addr);
2056                        if (page) {
2057                                void *kaddr = kmap(page);
2058                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2059                                        !dump_write(cprm->file, kaddr,
2060                                                    PAGE_SIZE);
2061                                kunmap(page);
2062                                page_cache_release(page);
2063                        } else
2064                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2065                        if (stop)
2066                                goto end_coredump;
2067                }
2068        }
2069
2070        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2071                goto end_coredump;
2072
2073        if (e_phnum == PN_XNUM) {
2074                size += sizeof(*shdr4extnum);
2075                if (size > cprm->limit
2076                    || !dump_write(cprm->file, shdr4extnum,
2077                                   sizeof(*shdr4extnum)))
2078                        goto end_coredump;
2079        }
2080
2081end_coredump:
2082        set_fs(fs);
2083
2084cleanup:
2085        free_note_info(&info);
2086        kfree(shdr4extnum);
2087        kfree(phdr4note);
2088        kfree(elf);
2089out:
2090        return has_dumped;
2091}
2092
2093#endif          /* CONFIG_ELF_CORE */
2094
2095static int __init init_elf_binfmt(void)
2096{
2097        register_binfmt(&elf_format);
2098        return 0;
2099}
2100
2101static void __exit exit_elf_binfmt(void)
2102{
2103        /* Remove the COFF and ELF loaders. */
2104        unregister_binfmt(&elf_format);
2105}
2106
2107core_initcall(init_elf_binfmt);
2108module_exit(exit_elf_binfmt);
2109MODULE_LICENSE("GPL");
2110
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.