linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/utsname.h>
  35#include <linux/coredump.h>
  36#include <asm/uaccess.h>
  37#include <asm/param.h>
  38#include <asm/page.h>
  39
  40#ifndef user_long_t
  41#define user_long_t long
  42#endif
  43#ifndef user_siginfo_t
  44#define user_siginfo_t siginfo_t
  45#endif
  46
  47static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  48static int load_elf_library(struct file *);
  49static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  50                                int, int, unsigned long);
  51
  52/*
  53 * If we don't support core dumping, then supply a NULL so we
  54 * don't even try.
  55 */
  56#ifdef CONFIG_ELF_CORE
  57static int elf_core_dump(struct coredump_params *cprm);
  58#else
  59#define elf_core_dump   NULL
  60#endif
  61
  62#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  63#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  64#else
  65#define ELF_MIN_ALIGN   PAGE_SIZE
  66#endif
  67
  68#ifndef ELF_CORE_EFLAGS
  69#define ELF_CORE_EFLAGS 0
  70#endif
  71
  72#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  73#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  74#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  75
  76static struct linux_binfmt elf_format = {
  77        .module         = THIS_MODULE,
  78        .load_binary    = load_elf_binary,
  79        .load_shlib     = load_elf_library,
  80        .core_dump      = elf_core_dump,
  81        .min_coredump   = ELF_EXEC_PAGESIZE,
  82};
  83
  84#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  85
  86static int set_brk(unsigned long start, unsigned long end)
  87{
  88        start = ELF_PAGEALIGN(start);
  89        end = ELF_PAGEALIGN(end);
  90        if (end > start) {
  91                unsigned long addr;
  92                addr = vm_brk(start, end - start);
  93                if (BAD_ADDR(addr))
  94                        return addr;
  95        }
  96        current->mm->start_brk = current->mm->brk = end;
  97        return 0;
  98}
  99
 100/* We need to explicitly zero any fractional pages
 101   after the data section (i.e. bss).  This would
 102   contain the junk from the file that should not
 103   be in memory
 104 */
 105static int padzero(unsigned long elf_bss)
 106{
 107        unsigned long nbyte;
 108
 109        nbyte = ELF_PAGEOFFSET(elf_bss);
 110        if (nbyte) {
 111                nbyte = ELF_MIN_ALIGN - nbyte;
 112                if (clear_user((void __user *) elf_bss, nbyte))
 113                        return -EFAULT;
 114        }
 115        return 0;
 116}
 117
 118/* Let's use some macros to make this stack manipulation a little clearer */
 119#ifdef CONFIG_STACK_GROWSUP
 120#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 121#define STACK_ROUND(sp, items) \
 122        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 123#define STACK_ALLOC(sp, len) ({ \
 124        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 125        old_sp; })
 126#else
 127#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 128#define STACK_ROUND(sp, items) \
 129        (((unsigned long) (sp - items)) &~ 15UL)
 130#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 131#endif
 132
 133#ifndef ELF_BASE_PLATFORM
 134/*
 135 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 136 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 137 * will be copied to the user stack in the same manner as AT_PLATFORM.
 138 */
 139#define ELF_BASE_PLATFORM NULL
 140#endif
 141
 142static int
 143create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 144                unsigned long load_addr, unsigned long interp_load_addr)
 145{
 146        unsigned long p = bprm->p;
 147        int argc = bprm->argc;
 148        int envc = bprm->envc;
 149        elf_addr_t __user *argv;
 150        elf_addr_t __user *envp;
 151        elf_addr_t __user *sp;
 152        elf_addr_t __user *u_platform;
 153        elf_addr_t __user *u_base_platform;
 154        elf_addr_t __user *u_rand_bytes;
 155        const char *k_platform = ELF_PLATFORM;
 156        const char *k_base_platform = ELF_BASE_PLATFORM;
 157        unsigned char k_rand_bytes[16];
 158        int items;
 159        elf_addr_t *elf_info;
 160        int ei_index = 0;
 161        const struct cred *cred = current_cred();
 162        struct vm_area_struct *vma;
 163
 164        /*
 165         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 166         * evictions by the processes running on the same package. One
 167         * thing we can do is to shuffle the initial stack for them.
 168         */
 169
 170        p = arch_align_stack(p);
 171
 172        /*
 173         * If this architecture has a platform capability string, copy it
 174         * to userspace.  In some cases (Sparc), this info is impossible
 175         * for userspace to get any other way, in others (i386) it is
 176         * merely difficult.
 177         */
 178        u_platform = NULL;
 179        if (k_platform) {
 180                size_t len = strlen(k_platform) + 1;
 181
 182                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 183                if (__copy_to_user(u_platform, k_platform, len))
 184                        return -EFAULT;
 185        }
 186
 187        /*
 188         * If this architecture has a "base" platform capability
 189         * string, copy it to userspace.
 190         */
 191        u_base_platform = NULL;
 192        if (k_base_platform) {
 193                size_t len = strlen(k_base_platform) + 1;
 194
 195                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 196                if (__copy_to_user(u_base_platform, k_base_platform, len))
 197                        return -EFAULT;
 198        }
 199
 200        /*
 201         * Generate 16 random bytes for userspace PRNG seeding.
 202         */
 203        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 204        u_rand_bytes = (elf_addr_t __user *)
 205                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 206        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 207                return -EFAULT;
 208
 209        /* Create the ELF interpreter info */
 210        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 211        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 212#define NEW_AUX_ENT(id, val) \
 213        do { \
 214                elf_info[ei_index++] = id; \
 215                elf_info[ei_index++] = val; \
 216        } while (0)
 217
 218#ifdef ARCH_DLINFO
 219        /* 
 220         * ARCH_DLINFO must come first so PPC can do its special alignment of
 221         * AUXV.
 222         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 223         * ARCH_DLINFO changes
 224         */
 225        ARCH_DLINFO;
 226#endif
 227        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 228        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 229        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 230        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 231        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 232        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 233        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 234        NEW_AUX_ENT(AT_FLAGS, 0);
 235        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 236        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 237        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 238        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 239        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 240        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 241        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 242        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 243        if (k_platform) {
 244                NEW_AUX_ENT(AT_PLATFORM,
 245                            (elf_addr_t)(unsigned long)u_platform);
 246        }
 247        if (k_base_platform) {
 248                NEW_AUX_ENT(AT_BASE_PLATFORM,
 249                            (elf_addr_t)(unsigned long)u_base_platform);
 250        }
 251        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 252                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 253        }
 254#undef NEW_AUX_ENT
 255        /* AT_NULL is zero; clear the rest too */
 256        memset(&elf_info[ei_index], 0,
 257               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 258
 259        /* And advance past the AT_NULL entry.  */
 260        ei_index += 2;
 261
 262        sp = STACK_ADD(p, ei_index);
 263
 264        items = (argc + 1) + (envc + 1) + 1;
 265        bprm->p = STACK_ROUND(sp, items);
 266
 267        /* Point sp at the lowest address on the stack */
 268#ifdef CONFIG_STACK_GROWSUP
 269        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 270        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 271#else
 272        sp = (elf_addr_t __user *)bprm->p;
 273#endif
 274
 275
 276        /*
 277         * Grow the stack manually; some architectures have a limit on how
 278         * far ahead a user-space access may be in order to grow the stack.
 279         */
 280        vma = find_extend_vma(current->mm, bprm->p);
 281        if (!vma)
 282                return -EFAULT;
 283
 284        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 285        if (__put_user(argc, sp++))
 286                return -EFAULT;
 287        argv = sp;
 288        envp = argv + argc + 1;
 289
 290        /* Populate argv and envp */
 291        p = current->mm->arg_end = current->mm->arg_start;
 292        while (argc-- > 0) {
 293                size_t len;
 294                if (__put_user((elf_addr_t)p, argv++))
 295                        return -EFAULT;
 296                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 297                if (!len || len > MAX_ARG_STRLEN)
 298                        return -EINVAL;
 299                p += len;
 300        }
 301        if (__put_user(0, argv))
 302                return -EFAULT;
 303        current->mm->arg_end = current->mm->env_start = p;
 304        while (envc-- > 0) {
 305                size_t len;
 306                if (__put_user((elf_addr_t)p, envp++))
 307                        return -EFAULT;
 308                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 309                if (!len || len > MAX_ARG_STRLEN)
 310                        return -EINVAL;
 311                p += len;
 312        }
 313        if (__put_user(0, envp))
 314                return -EFAULT;
 315        current->mm->env_end = p;
 316
 317        /* Put the elf_info on the stack in the right place.  */
 318        sp = (elf_addr_t __user *)envp + 1;
 319        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 320                return -EFAULT;
 321        return 0;
 322}
 323
 324static unsigned long elf_map(struct file *filep, unsigned long addr,
 325                struct elf_phdr *eppnt, int prot, int type,
 326                unsigned long total_size)
 327{
 328        unsigned long map_addr;
 329        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 330        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 331        addr = ELF_PAGESTART(addr);
 332        size = ELF_PAGEALIGN(size);
 333
 334        /* mmap() will return -EINVAL if given a zero size, but a
 335         * segment with zero filesize is perfectly valid */
 336        if (!size)
 337                return addr;
 338
 339        /*
 340        * total_size is the size of the ELF (interpreter) image.
 341        * The _first_ mmap needs to know the full size, otherwise
 342        * randomization might put this image into an overlapping
 343        * position with the ELF binary image. (since size < total_size)
 344        * So we first map the 'big' image - and unmap the remainder at
 345        * the end. (which unmap is needed for ELF images with holes.)
 346        */
 347        if (total_size) {
 348                total_size = ELF_PAGEALIGN(total_size);
 349                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 350                if (!BAD_ADDR(map_addr))
 351                        vm_munmap(map_addr+size, total_size-size);
 352        } else
 353                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 354
 355        return(map_addr);
 356}
 357
 358static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 359{
 360        int i, first_idx = -1, last_idx = -1;
 361
 362        for (i = 0; i < nr; i++) {
 363                if (cmds[i].p_type == PT_LOAD) {
 364                        last_idx = i;
 365                        if (first_idx == -1)
 366                                first_idx = i;
 367                }
 368        }
 369        if (first_idx == -1)
 370                return 0;
 371
 372        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 373                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 374}
 375
 376
 377/* This is much more generalized than the library routine read function,
 378   so we keep this separate.  Technically the library read function
 379   is only provided so that we can read a.out libraries that have
 380   an ELF header */
 381
 382static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 383                struct file *interpreter, unsigned long *interp_map_addr,
 384                unsigned long no_base)
 385{
 386        struct elf_phdr *elf_phdata;
 387        struct elf_phdr *eppnt;
 388        unsigned long load_addr = 0;
 389        int load_addr_set = 0;
 390        unsigned long last_bss = 0, elf_bss = 0;
 391        unsigned long error = ~0UL;
 392        unsigned long total_size;
 393        int retval, i, size;
 394
 395        /* First of all, some simple consistency checks */
 396        if (interp_elf_ex->e_type != ET_EXEC &&
 397            interp_elf_ex->e_type != ET_DYN)
 398                goto out;
 399        if (!elf_check_arch(interp_elf_ex))
 400                goto out;
 401        if (!interpreter->f_op || !interpreter->f_op->mmap)
 402                goto out;
 403
 404        /*
 405         * If the size of this structure has changed, then punt, since
 406         * we will be doing the wrong thing.
 407         */
 408        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 409                goto out;
 410        if (interp_elf_ex->e_phnum < 1 ||
 411                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 412                goto out;
 413
 414        /* Now read in all of the header information */
 415        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 416        if (size > ELF_MIN_ALIGN)
 417                goto out;
 418        elf_phdata = kmalloc(size, GFP_KERNEL);
 419        if (!elf_phdata)
 420                goto out;
 421
 422        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 423                             (char *)elf_phdata, size);
 424        error = -EIO;
 425        if (retval != size) {
 426                if (retval < 0)
 427                        error = retval; 
 428                goto out_close;
 429        }
 430
 431        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 432        if (!total_size) {
 433                error = -EINVAL;
 434                goto out_close;
 435        }
 436
 437        eppnt = elf_phdata;
 438        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 439                if (eppnt->p_type == PT_LOAD) {
 440                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 441                        int elf_prot = 0;
 442                        unsigned long vaddr = 0;
 443                        unsigned long k, map_addr;
 444
 445                        if (eppnt->p_flags & PF_R)
 446                                elf_prot = PROT_READ;
 447                        if (eppnt->p_flags & PF_W)
 448                                elf_prot |= PROT_WRITE;
 449                        if (eppnt->p_flags & PF_X)
 450                                elf_prot |= PROT_EXEC;
 451                        vaddr = eppnt->p_vaddr;
 452                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 453                                elf_type |= MAP_FIXED;
 454                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 455                                load_addr = -vaddr;
 456
 457                        map_addr = elf_map(interpreter, load_addr + vaddr,
 458                                        eppnt, elf_prot, elf_type, total_size);
 459                        total_size = 0;
 460                        if (!*interp_map_addr)
 461                                *interp_map_addr = map_addr;
 462                        error = map_addr;
 463                        if (BAD_ADDR(map_addr))
 464                                goto out_close;
 465
 466                        if (!load_addr_set &&
 467                            interp_elf_ex->e_type == ET_DYN) {
 468                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 469                                load_addr_set = 1;
 470                        }
 471
 472                        /*
 473                         * Check to see if the section's size will overflow the
 474                         * allowed task size. Note that p_filesz must always be
 475                         * <= p_memsize so it's only necessary to check p_memsz.
 476                         */
 477                        k = load_addr + eppnt->p_vaddr;
 478                        if (BAD_ADDR(k) ||
 479                            eppnt->p_filesz > eppnt->p_memsz ||
 480                            eppnt->p_memsz > TASK_SIZE ||
 481                            TASK_SIZE - eppnt->p_memsz < k) {
 482                                error = -ENOMEM;
 483                                goto out_close;
 484                        }
 485
 486                        /*
 487                         * Find the end of the file mapping for this phdr, and
 488                         * keep track of the largest address we see for this.
 489                         */
 490                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 491                        if (k > elf_bss)
 492                                elf_bss = k;
 493
 494                        /*
 495                         * Do the same thing for the memory mapping - between
 496                         * elf_bss and last_bss is the bss section.
 497                         */
 498                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 499                        if (k > last_bss)
 500                                last_bss = k;
 501                }
 502        }
 503
 504        if (last_bss > elf_bss) {
 505                /*
 506                 * Now fill out the bss section.  First pad the last page up
 507                 * to the page boundary, and then perform a mmap to make sure
 508                 * that there are zero-mapped pages up to and including the
 509                 * last bss page.
 510                 */
 511                if (padzero(elf_bss)) {
 512                        error = -EFAULT;
 513                        goto out_close;
 514                }
 515
 516                /* What we have mapped so far */
 517                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 518
 519                /* Map the last of the bss segment */
 520                error = vm_brk(elf_bss, last_bss - elf_bss);
 521                if (BAD_ADDR(error))
 522                        goto out_close;
 523        }
 524
 525        error = load_addr;
 526
 527out_close:
 528        kfree(elf_phdata);
 529out:
 530        return error;
 531}
 532
 533/*
 534 * These are the functions used to load ELF style executables and shared
 535 * libraries.  There is no binary dependent code anywhere else.
 536 */
 537
 538#define INTERPRETER_NONE 0
 539#define INTERPRETER_ELF 2
 540
 541#ifndef STACK_RND_MASK
 542#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 543#endif
 544
 545static unsigned long randomize_stack_top(unsigned long stack_top)
 546{
 547        unsigned int random_variable = 0;
 548
 549        if ((current->flags & PF_RANDOMIZE) &&
 550                !(current->personality & ADDR_NO_RANDOMIZE)) {
 551                random_variable = get_random_int() & STACK_RND_MASK;
 552                random_variable <<= PAGE_SHIFT;
 553        }
 554#ifdef CONFIG_STACK_GROWSUP
 555        return PAGE_ALIGN(stack_top) + random_variable;
 556#else
 557        return PAGE_ALIGN(stack_top) - random_variable;
 558#endif
 559}
 560
 561static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 562{
 563        struct file *interpreter = NULL; /* to shut gcc up */
 564        unsigned long load_addr = 0, load_bias = 0;
 565        int load_addr_set = 0;
 566        char * elf_interpreter = NULL;
 567        unsigned long error;
 568        struct elf_phdr *elf_ppnt, *elf_phdata;
 569        unsigned long elf_bss, elf_brk;
 570        int retval, i;
 571        unsigned int size;
 572        unsigned long elf_entry;
 573        unsigned long interp_load_addr = 0;
 574        unsigned long start_code, end_code, start_data, end_data;
 575        unsigned long reloc_func_desc __maybe_unused = 0;
 576        int executable_stack = EXSTACK_DEFAULT;
 577        unsigned long def_flags = 0;
 578        struct {
 579                struct elfhdr elf_ex;
 580                struct elfhdr interp_elf_ex;
 581        } *loc;
 582
 583        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 584        if (!loc) {
 585                retval = -ENOMEM;
 586                goto out_ret;
 587        }
 588        
 589        /* Get the exec-header */
 590        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 591
 592        retval = -ENOEXEC;
 593        /* First of all, some simple consistency checks */
 594        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 595                goto out;
 596
 597        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 598                goto out;
 599        if (!elf_check_arch(&loc->elf_ex))
 600                goto out;
 601        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 602                goto out;
 603
 604        /* Now read in all of the header information */
 605        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 606                goto out;
 607        if (loc->elf_ex.e_phnum < 1 ||
 608                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 609                goto out;
 610        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 611        retval = -ENOMEM;
 612        elf_phdata = kmalloc(size, GFP_KERNEL);
 613        if (!elf_phdata)
 614                goto out;
 615
 616        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 617                             (char *)elf_phdata, size);
 618        if (retval != size) {
 619                if (retval >= 0)
 620                        retval = -EIO;
 621                goto out_free_ph;
 622        }
 623
 624        elf_ppnt = elf_phdata;
 625        elf_bss = 0;
 626        elf_brk = 0;
 627
 628        start_code = ~0UL;
 629        end_code = 0;
 630        start_data = 0;
 631        end_data = 0;
 632
 633        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 634                if (elf_ppnt->p_type == PT_INTERP) {
 635                        /* This is the program interpreter used for
 636                         * shared libraries - for now assume that this
 637                         * is an a.out format binary
 638                         */
 639                        retval = -ENOEXEC;
 640                        if (elf_ppnt->p_filesz > PATH_MAX || 
 641                            elf_ppnt->p_filesz < 2)
 642                                goto out_free_ph;
 643
 644                        retval = -ENOMEM;
 645                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 646                                                  GFP_KERNEL);
 647                        if (!elf_interpreter)
 648                                goto out_free_ph;
 649
 650                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 651                                             elf_interpreter,
 652                                             elf_ppnt->p_filesz);
 653                        if (retval != elf_ppnt->p_filesz) {
 654                                if (retval >= 0)
 655                                        retval = -EIO;
 656                                goto out_free_interp;
 657                        }
 658                        /* make sure path is NULL terminated */
 659                        retval = -ENOEXEC;
 660                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 661                                goto out_free_interp;
 662
 663                        interpreter = open_exec(elf_interpreter);
 664                        retval = PTR_ERR(interpreter);
 665                        if (IS_ERR(interpreter))
 666                                goto out_free_interp;
 667
 668                        /*
 669                         * If the binary is not readable then enforce
 670                         * mm->dumpable = 0 regardless of the interpreter's
 671                         * permissions.
 672                         */
 673                        would_dump(bprm, interpreter);
 674
 675                        retval = kernel_read(interpreter, 0, bprm->buf,
 676                                             BINPRM_BUF_SIZE);
 677                        if (retval != BINPRM_BUF_SIZE) {
 678                                if (retval >= 0)
 679                                        retval = -EIO;
 680                                goto out_free_dentry;
 681                        }
 682
 683                        /* Get the exec headers */
 684                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 685                        break;
 686                }
 687                elf_ppnt++;
 688        }
 689
 690        elf_ppnt = elf_phdata;
 691        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 692                if (elf_ppnt->p_type == PT_GNU_STACK) {
 693                        if (elf_ppnt->p_flags & PF_X)
 694                                executable_stack = EXSTACK_ENABLE_X;
 695                        else
 696                                executable_stack = EXSTACK_DISABLE_X;
 697                        break;
 698                }
 699
 700        /* Some simple consistency checks for the interpreter */
 701        if (elf_interpreter) {
 702                retval = -ELIBBAD;
 703                /* Not an ELF interpreter */
 704                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 705                        goto out_free_dentry;
 706                /* Verify the interpreter has a valid arch */
 707                if (!elf_check_arch(&loc->interp_elf_ex))
 708                        goto out_free_dentry;
 709        }
 710
 711        /* Flush all traces of the currently running executable */
 712        retval = flush_old_exec(bprm);
 713        if (retval)
 714                goto out_free_dentry;
 715
 716        /* OK, This is the point of no return */
 717        current->mm->def_flags = def_flags;
 718
 719        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 720           may depend on the personality.  */
 721        SET_PERSONALITY(loc->elf_ex);
 722        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 723                current->personality |= READ_IMPLIES_EXEC;
 724
 725        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 726                current->flags |= PF_RANDOMIZE;
 727
 728        setup_new_exec(bprm);
 729
 730        /* Do this so that we can load the interpreter, if need be.  We will
 731           change some of these later */
 732        current->mm->free_area_cache = current->mm->mmap_base;
 733        current->mm->cached_hole_size = 0;
 734        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 735                                 executable_stack);
 736        if (retval < 0) {
 737                send_sig(SIGKILL, current, 0);
 738                goto out_free_dentry;
 739        }
 740        
 741        current->mm->start_stack = bprm->p;
 742
 743        /* Now we do a little grungy work by mmapping the ELF image into
 744           the correct location in memory. */
 745        for(i = 0, elf_ppnt = elf_phdata;
 746            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 747                int elf_prot = 0, elf_flags;
 748                unsigned long k, vaddr;
 749
 750                if (elf_ppnt->p_type != PT_LOAD)
 751                        continue;
 752
 753                if (unlikely (elf_brk > elf_bss)) {
 754                        unsigned long nbyte;
 755                    
 756                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 757                           before this one. Map anonymous pages, if needed,
 758                           and clear the area.  */
 759                        retval = set_brk(elf_bss + load_bias,
 760                                         elf_brk + load_bias);
 761                        if (retval) {
 762                                send_sig(SIGKILL, current, 0);
 763                                goto out_free_dentry;
 764                        }
 765                        nbyte = ELF_PAGEOFFSET(elf_bss);
 766                        if (nbyte) {
 767                                nbyte = ELF_MIN_ALIGN - nbyte;
 768                                if (nbyte > elf_brk - elf_bss)
 769                                        nbyte = elf_brk - elf_bss;
 770                                if (clear_user((void __user *)elf_bss +
 771                                                        load_bias, nbyte)) {
 772                                        /*
 773                                         * This bss-zeroing can fail if the ELF
 774                                         * file specifies odd protections. So
 775                                         * we don't check the return value
 776                                         */
 777                                }
 778                        }
 779                }
 780
 781                if (elf_ppnt->p_flags & PF_R)
 782                        elf_prot |= PROT_READ;
 783                if (elf_ppnt->p_flags & PF_W)
 784                        elf_prot |= PROT_WRITE;
 785                if (elf_ppnt->p_flags & PF_X)
 786                        elf_prot |= PROT_EXEC;
 787
 788                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 789
 790                vaddr = elf_ppnt->p_vaddr;
 791                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 792                        elf_flags |= MAP_FIXED;
 793                } else if (loc->elf_ex.e_type == ET_DYN) {
 794                        /* Try and get dynamic programs out of the way of the
 795                         * default mmap base, as well as whatever program they
 796                         * might try to exec.  This is because the brk will
 797                         * follow the loader, and is not movable.  */
 798#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 799                        /* Memory randomization might have been switched off
 800                         * in runtime via sysctl.
 801                         * If that is the case, retain the original non-zero
 802                         * load_bias value in order to establish proper
 803                         * non-randomized mappings.
 804                         */
 805                        if (current->flags & PF_RANDOMIZE)
 806                                load_bias = 0;
 807                        else
 808                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 809#else
 810                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 811#endif
 812                }
 813
 814                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 815                                elf_prot, elf_flags, 0);
 816                if (BAD_ADDR(error)) {
 817                        send_sig(SIGKILL, current, 0);
 818                        retval = IS_ERR((void *)error) ?
 819                                PTR_ERR((void*)error) : -EINVAL;
 820                        goto out_free_dentry;
 821                }
 822
 823                if (!load_addr_set) {
 824                        load_addr_set = 1;
 825                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 826                        if (loc->elf_ex.e_type == ET_DYN) {
 827                                load_bias += error -
 828                                             ELF_PAGESTART(load_bias + vaddr);
 829                                load_addr += load_bias;
 830                                reloc_func_desc = load_bias;
 831                        }
 832                }
 833                k = elf_ppnt->p_vaddr;
 834                if (k < start_code)
 835                        start_code = k;
 836                if (start_data < k)
 837                        start_data = k;
 838
 839                /*
 840                 * Check to see if the section's size will overflow the
 841                 * allowed task size. Note that p_filesz must always be
 842                 * <= p_memsz so it is only necessary to check p_memsz.
 843                 */
 844                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 845                    elf_ppnt->p_memsz > TASK_SIZE ||
 846                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 847                        /* set_brk can never work. Avoid overflows. */
 848                        send_sig(SIGKILL, current, 0);
 849                        retval = -EINVAL;
 850                        goto out_free_dentry;
 851                }
 852
 853                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 854
 855                if (k > elf_bss)
 856                        elf_bss = k;
 857                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 858                        end_code = k;
 859                if (end_data < k)
 860                        end_data = k;
 861                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 862                if (k > elf_brk)
 863                        elf_brk = k;
 864        }
 865
 866        loc->elf_ex.e_entry += load_bias;
 867        elf_bss += load_bias;
 868        elf_brk += load_bias;
 869        start_code += load_bias;
 870        end_code += load_bias;
 871        start_data += load_bias;
 872        end_data += load_bias;
 873
 874        /* Calling set_brk effectively mmaps the pages that we need
 875         * for the bss and break sections.  We must do this before
 876         * mapping in the interpreter, to make sure it doesn't wind
 877         * up getting placed where the bss needs to go.
 878         */
 879        retval = set_brk(elf_bss, elf_brk);
 880        if (retval) {
 881                send_sig(SIGKILL, current, 0);
 882                goto out_free_dentry;
 883        }
 884        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 885                send_sig(SIGSEGV, current, 0);
 886                retval = -EFAULT; /* Nobody gets to see this, but.. */
 887                goto out_free_dentry;
 888        }
 889
 890        if (elf_interpreter) {
 891                unsigned long interp_map_addr = 0;
 892
 893                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 894                                            interpreter,
 895                                            &interp_map_addr,
 896                                            load_bias);
 897                if (!IS_ERR((void *)elf_entry)) {
 898                        /*
 899                         * load_elf_interp() returns relocation
 900                         * adjustment
 901                         */
 902                        interp_load_addr = elf_entry;
 903                        elf_entry += loc->interp_elf_ex.e_entry;
 904                }
 905                if (BAD_ADDR(elf_entry)) {
 906                        force_sig(SIGSEGV, current);
 907                        retval = IS_ERR((void *)elf_entry) ?
 908                                        (int)elf_entry : -EINVAL;
 909                        goto out_free_dentry;
 910                }
 911                reloc_func_desc = interp_load_addr;
 912
 913                allow_write_access(interpreter);
 914                fput(interpreter);
 915                kfree(elf_interpreter);
 916        } else {
 917                elf_entry = loc->elf_ex.e_entry;
 918                if (BAD_ADDR(elf_entry)) {
 919                        force_sig(SIGSEGV, current);
 920                        retval = -EINVAL;
 921                        goto out_free_dentry;
 922                }
 923        }
 924
 925        kfree(elf_phdata);
 926
 927        set_binfmt(&elf_format);
 928
 929#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 930        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 931        if (retval < 0) {
 932                send_sig(SIGKILL, current, 0);
 933                goto out;
 934        }
 935#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 936
 937        install_exec_creds(bprm);
 938        retval = create_elf_tables(bprm, &loc->elf_ex,
 939                          load_addr, interp_load_addr);
 940        if (retval < 0) {
 941                send_sig(SIGKILL, current, 0);
 942                goto out;
 943        }
 944        /* N.B. passed_fileno might not be initialized? */
 945        current->mm->end_code = end_code;
 946        current->mm->start_code = start_code;
 947        current->mm->start_data = start_data;
 948        current->mm->end_data = end_data;
 949        current->mm->start_stack = bprm->p;
 950
 951#ifdef arch_randomize_brk
 952        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 953                current->mm->brk = current->mm->start_brk =
 954                        arch_randomize_brk(current->mm);
 955#ifdef CONFIG_COMPAT_BRK
 956                current->brk_randomized = 1;
 957#endif
 958        }
 959#endif
 960
 961        if (current->personality & MMAP_PAGE_ZERO) {
 962                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 963                   and some applications "depend" upon this behavior.
 964                   Since we do not have the power to recompile these, we
 965                   emulate the SVr4 behavior. Sigh. */
 966                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 967                                MAP_FIXED | MAP_PRIVATE, 0);
 968        }
 969
 970#ifdef ELF_PLAT_INIT
 971        /*
 972         * The ABI may specify that certain registers be set up in special
 973         * ways (on i386 %edx is the address of a DT_FINI function, for
 974         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 975         * that the e_entry field is the address of the function descriptor
 976         * for the startup routine, rather than the address of the startup
 977         * routine itself.  This macro performs whatever initialization to
 978         * the regs structure is required as well as any relocations to the
 979         * function descriptor entries when executing dynamically links apps.
 980         */
 981        ELF_PLAT_INIT(regs, reloc_func_desc);
 982#endif
 983
 984        start_thread(regs, elf_entry, bprm->p);
 985        retval = 0;
 986out:
 987        kfree(loc);
 988out_ret:
 989        return retval;
 990
 991        /* error cleanup */
 992out_free_dentry:
 993        allow_write_access(interpreter);
 994        if (interpreter)
 995                fput(interpreter);
 996out_free_interp:
 997        kfree(elf_interpreter);
 998out_free_ph:
 999        kfree(elf_phdata);
1000        goto out;
1001}
1002
1003/* This is really simpleminded and specialized - we are loading an
1004   a.out library that is given an ELF header. */
1005static int load_elf_library(struct file *file)
1006{
1007        struct elf_phdr *elf_phdata;
1008        struct elf_phdr *eppnt;
1009        unsigned long elf_bss, bss, len;
1010        int retval, error, i, j;
1011        struct elfhdr elf_ex;
1012
1013        error = -ENOEXEC;
1014        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1015        if (retval != sizeof(elf_ex))
1016                goto out;
1017
1018        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1019                goto out;
1020
1021        /* First of all, some simple consistency checks */
1022        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1023            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1024                goto out;
1025
1026        /* Now read in all of the header information */
1027
1028        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1029        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1030
1031        error = -ENOMEM;
1032        elf_phdata = kmalloc(j, GFP_KERNEL);
1033        if (!elf_phdata)
1034                goto out;
1035
1036        eppnt = elf_phdata;
1037        error = -ENOEXEC;
1038        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1039        if (retval != j)
1040                goto out_free_ph;
1041
1042        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1043                if ((eppnt + i)->p_type == PT_LOAD)
1044                        j++;
1045        if (j != 1)
1046                goto out_free_ph;
1047
1048        while (eppnt->p_type != PT_LOAD)
1049                eppnt++;
1050
1051        /* Now use mmap to map the library into memory. */
1052        error = vm_mmap(file,
1053                        ELF_PAGESTART(eppnt->p_vaddr),
1054                        (eppnt->p_filesz +
1055                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1056                        PROT_READ | PROT_WRITE | PROT_EXEC,
1057                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1058                        (eppnt->p_offset -
1059                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1060        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1061                goto out_free_ph;
1062
1063        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1064        if (padzero(elf_bss)) {
1065                error = -EFAULT;
1066                goto out_free_ph;
1067        }
1068
1069        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1070                            ELF_MIN_ALIGN - 1);
1071        bss = eppnt->p_memsz + eppnt->p_vaddr;
1072        if (bss > len)
1073                vm_brk(len, bss - len);
1074        error = 0;
1075
1076out_free_ph:
1077        kfree(elf_phdata);
1078out:
1079        return error;
1080}
1081
1082#ifdef CONFIG_ELF_CORE
1083/*
1084 * ELF core dumper
1085 *
1086 * Modelled on fs/exec.c:aout_core_dump()
1087 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1088 */
1089
1090/*
1091 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1092 * that are useful for post-mortem analysis are included in every core dump.
1093 * In that way we ensure that the core dump is fully interpretable later
1094 * without matching up the same kernel and hardware config to see what PC values
1095 * meant. These special mappings include - vDSO, vsyscall, and other
1096 * architecture specific mappings
1097 */
1098static bool always_dump_vma(struct vm_area_struct *vma)
1099{
1100        /* Any vsyscall mappings? */
1101        if (vma == get_gate_vma(vma->vm_mm))
1102                return true;
1103        /*
1104         * arch_vma_name() returns non-NULL for special architecture mappings,
1105         * such as vDSO sections.
1106         */
1107        if (arch_vma_name(vma))
1108                return true;
1109
1110        return false;
1111}
1112
1113/*
1114 * Decide what to dump of a segment, part, all or none.
1115 */
1116static unsigned long vma_dump_size(struct vm_area_struct *vma,
1117                                   unsigned long mm_flags)
1118{
1119#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1120
1121        /* always dump the vdso and vsyscall sections */
1122        if (always_dump_vma(vma))
1123                goto whole;
1124
1125        if (vma->vm_flags & VM_DONTDUMP)
1126                return 0;
1127
1128        /* Hugetlb memory check */
1129        if (vma->vm_flags & VM_HUGETLB) {
1130                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1131                        goto whole;
1132                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1133                        goto whole;
1134        }
1135
1136        /* Do not dump I/O mapped devices or special mappings */
1137        if (vma->vm_flags & VM_IO)
1138                return 0;
1139
1140        /* By default, dump shared memory if mapped from an anonymous file. */
1141        if (vma->vm_flags & VM_SHARED) {
1142                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1143                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1144                        goto whole;
1145                return 0;
1146        }
1147
1148        /* Dump segments that have been written to.  */
1149        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1150                goto whole;
1151        if (vma->vm_file == NULL)
1152                return 0;
1153
1154        if (FILTER(MAPPED_PRIVATE))
1155                goto whole;
1156
1157        /*
1158         * If this looks like the beginning of a DSO or executable mapping,
1159         * check for an ELF header.  If we find one, dump the first page to
1160         * aid in determining what was mapped here.
1161         */
1162        if (FILTER(ELF_HEADERS) &&
1163            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1164                u32 __user *header = (u32 __user *) vma->vm_start;
1165                u32 word;
1166                mm_segment_t fs = get_fs();
1167                /*
1168                 * Doing it this way gets the constant folded by GCC.
1169                 */
1170                union {
1171                        u32 cmp;
1172                        char elfmag[SELFMAG];
1173                } magic;
1174                BUILD_BUG_ON(SELFMAG != sizeof word);
1175                magic.elfmag[EI_MAG0] = ELFMAG0;
1176                magic.elfmag[EI_MAG1] = ELFMAG1;
1177                magic.elfmag[EI_MAG2] = ELFMAG2;
1178                magic.elfmag[EI_MAG3] = ELFMAG3;
1179                /*
1180                 * Switch to the user "segment" for get_user(),
1181                 * then put back what elf_core_dump() had in place.
1182                 */
1183                set_fs(USER_DS);
1184                if (unlikely(get_user(word, header)))
1185                        word = 0;
1186                set_fs(fs);
1187                if (word == magic.cmp)
1188                        return PAGE_SIZE;
1189        }
1190
1191#undef  FILTER
1192
1193        return 0;
1194
1195whole:
1196        return vma->vm_end - vma->vm_start;
1197}
1198
1199/* An ELF note in memory */
1200struct memelfnote
1201{
1202        const char *name;
1203        int type;
1204        unsigned int datasz;
1205        void *data;
1206};
1207
1208static int notesize(struct memelfnote *en)
1209{
1210        int sz;
1211
1212        sz = sizeof(struct elf_note);
1213        sz += roundup(strlen(en->name) + 1, 4);
1214        sz += roundup(en->datasz, 4);
1215
1216        return sz;
1217}
1218
1219#define DUMP_WRITE(addr, nr, foffset)   \
1220        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1221
1222static int alignfile(struct file *file, loff_t *foffset)
1223{
1224        static const char buf[4] = { 0, };
1225        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1226        return 1;
1227}
1228
1229static int writenote(struct memelfnote *men, struct file *file,
1230                        loff_t *foffset)
1231{
1232        struct elf_note en;
1233        en.n_namesz = strlen(men->name) + 1;
1234        en.n_descsz = men->datasz;
1235        en.n_type = men->type;
1236
1237        DUMP_WRITE(&en, sizeof(en), foffset);
1238        DUMP_WRITE(men->name, en.n_namesz, foffset);
1239        if (!alignfile(file, foffset))
1240                return 0;
1241        DUMP_WRITE(men->data, men->datasz, foffset);
1242        if (!alignfile(file, foffset))
1243                return 0;
1244
1245        return 1;
1246}
1247#undef DUMP_WRITE
1248
1249static void fill_elf_header(struct elfhdr *elf, int segs,
1250                            u16 machine, u32 flags, u8 osabi)
1251{
1252        memset(elf, 0, sizeof(*elf));
1253
1254        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1255        elf->e_ident[EI_CLASS] = ELF_CLASS;
1256        elf->e_ident[EI_DATA] = ELF_DATA;
1257        elf->e_ident[EI_VERSION] = EV_CURRENT;
1258        elf->e_ident[EI_OSABI] = ELF_OSABI;
1259
1260        elf->e_type = ET_CORE;
1261        elf->e_machine = machine;
1262        elf->e_version = EV_CURRENT;
1263        elf->e_phoff = sizeof(struct elfhdr);
1264        elf->e_flags = flags;
1265        elf->e_ehsize = sizeof(struct elfhdr);
1266        elf->e_phentsize = sizeof(struct elf_phdr);
1267        elf->e_phnum = segs;
1268
1269        return;
1270}
1271
1272static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1273{
1274        phdr->p_type = PT_NOTE;
1275        phdr->p_offset = offset;
1276        phdr->p_vaddr = 0;
1277        phdr->p_paddr = 0;
1278        phdr->p_filesz = sz;
1279        phdr->p_memsz = 0;
1280        phdr->p_flags = 0;
1281        phdr->p_align = 0;
1282        return;
1283}
1284
1285static void fill_note(struct memelfnote *note, const char *name, int type, 
1286                unsigned int sz, void *data)
1287{
1288        note->name = name;
1289        note->type = type;
1290        note->datasz = sz;
1291        note->data = data;
1292        return;
1293}
1294
1295/*
1296 * fill up all the fields in prstatus from the given task struct, except
1297 * registers which need to be filled up separately.
1298 */
1299static void fill_prstatus(struct elf_prstatus *prstatus,
1300                struct task_struct *p, long signr)
1301{
1302        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1303        prstatus->pr_sigpend = p->pending.signal.sig[0];
1304        prstatus->pr_sighold = p->blocked.sig[0];
1305        rcu_read_lock();
1306        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1307        rcu_read_unlock();
1308        prstatus->pr_pid = task_pid_vnr(p);
1309        prstatus->pr_pgrp = task_pgrp_vnr(p);
1310        prstatus->pr_sid = task_session_vnr(p);
1311        if (thread_group_leader(p)) {
1312                struct task_cputime cputime;
1313
1314                /*
1315                 * This is the record for the group leader.  It shows the
1316                 * group-wide total, not its individual thread total.
1317                 */
1318                thread_group_cputime(p, &cputime);
1319                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1320                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1321        } else {
1322                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1323                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1324        }
1325        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1326        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1327}
1328
1329static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1330                       struct mm_struct *mm)
1331{
1332        const struct cred *cred;
1333        unsigned int i, len;
1334        
1335        /* first copy the parameters from user space */
1336        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1337
1338        len = mm->arg_end - mm->arg_start;
1339        if (len >= ELF_PRARGSZ)
1340                len = ELF_PRARGSZ-1;
1341        if (copy_from_user(&psinfo->pr_psargs,
1342                           (const char __user *)mm->arg_start, len))
1343                return -EFAULT;
1344        for(i = 0; i < len; i++)
1345                if (psinfo->pr_psargs[i] == 0)
1346                        psinfo->pr_psargs[i] = ' ';
1347        psinfo->pr_psargs[len] = 0;
1348
1349        rcu_read_lock();
1350        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1351        rcu_read_unlock();
1352        psinfo->pr_pid = task_pid_vnr(p);
1353        psinfo->pr_pgrp = task_pgrp_vnr(p);
1354        psinfo->pr_sid = task_session_vnr(p);
1355
1356        i = p->state ? ffz(~p->state) + 1 : 0;
1357        psinfo->pr_state = i;
1358        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1359        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1360        psinfo->pr_nice = task_nice(p);
1361        psinfo->pr_flag = p->flags;
1362        rcu_read_lock();
1363        cred = __task_cred(p);
1364        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1365        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1366        rcu_read_unlock();
1367        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1368        
1369        return 0;
1370}
1371
1372static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1373{
1374        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1375        int i = 0;
1376        do
1377                i += 2;
1378        while (auxv[i - 2] != AT_NULL);
1379        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1380}
1381
1382static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1383                siginfo_t *siginfo)
1384{
1385        mm_segment_t old_fs = get_fs();
1386        set_fs(KERNEL_DS);
1387        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1388        set_fs(old_fs);
1389        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1390}
1391
1392#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1393/*
1394 * Format of NT_FILE note:
1395 *
1396 * long count     -- how many files are mapped
1397 * long page_size -- units for file_ofs
1398 * array of [COUNT] elements of
1399 *   long start
1400 *   long end
1401 *   long file_ofs
1402 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1403 */
1404static void fill_files_note(struct memelfnote *note)
1405{
1406        struct vm_area_struct *vma;
1407        unsigned count, size, names_ofs, remaining, n;
1408        user_long_t *data;
1409        user_long_t *start_end_ofs;
1410        char *name_base, *name_curpos;
1411
1412        /* *Estimated* file count and total data size needed */
1413        count = current->mm->map_count;
1414        size = count * 64;
1415
1416        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1417 alloc:
1418        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1419                goto err;
1420        size = round_up(size, PAGE_SIZE);
1421        data = vmalloc(size);
1422        if (!data)
1423                goto err;
1424
1425        start_end_ofs = data + 2;
1426        name_base = name_curpos = ((char *)data) + names_ofs;
1427        remaining = size - names_ofs;
1428        count = 0;
1429        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1430                struct file *file;
1431                const char *filename;
1432
1433                file = vma->vm_file;
1434                if (!file)
1435                        continue;
1436                filename = d_path(&file->f_path, name_curpos, remaining);
1437                if (IS_ERR(filename)) {
1438                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1439                                vfree(data);
1440                                size = size * 5 / 4;
1441                                goto alloc;
1442                        }
1443                        continue;
1444                }
1445
1446                /* d_path() fills at the end, move name down */
1447                /* n = strlen(filename) + 1: */
1448                n = (name_curpos + remaining) - filename;
1449                remaining = filename - name_curpos;
1450                memmove(name_curpos, filename, n);
1451                name_curpos += n;
1452
1453                *start_end_ofs++ = vma->vm_start;
1454                *start_end_ofs++ = vma->vm_end;
1455                *start_end_ofs++ = vma->vm_pgoff;
1456                count++;
1457        }
1458
1459        /* Now we know exact count of files, can store it */
1460        data[0] = count;
1461        data[1] = PAGE_SIZE;
1462        /*
1463         * Count usually is less than current->mm->map_count,
1464         * we need to move filenames down.
1465         */
1466        n = current->mm->map_count - count;
1467        if (n != 0) {
1468                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1469                memmove(name_base - shift_bytes, name_base,
1470                        name_curpos - name_base);
1471                name_curpos -= shift_bytes;
1472        }
1473
1474        size = name_curpos - (char *)data;
1475        fill_note(note, "CORE", NT_FILE, size, data);
1476 err: ;
1477}
1478
1479#ifdef CORE_DUMP_USE_REGSET
1480#include <linux/regset.h>
1481
1482struct elf_thread_core_info {
1483        struct elf_thread_core_info *next;
1484        struct task_struct *task;
1485        struct elf_prstatus prstatus;
1486        struct memelfnote notes[0];
1487};
1488
1489struct elf_note_info {
1490        struct elf_thread_core_info *thread;
1491        struct memelfnote psinfo;
1492        struct memelfnote signote;
1493        struct memelfnote auxv;
1494        struct memelfnote files;
1495        user_siginfo_t csigdata;
1496        size_t size;
1497        int thread_notes;
1498};
1499
1500/*
1501 * When a regset has a writeback hook, we call it on each thread before
1502 * dumping user memory.  On register window machines, this makes sure the
1503 * user memory backing the register data is up to date before we read it.
1504 */
1505static void do_thread_regset_writeback(struct task_struct *task,
1506                                       const struct user_regset *regset)
1507{
1508        if (regset->writeback)
1509                regset->writeback(task, regset, 1);
1510}
1511
1512#ifndef PR_REG_SIZE
1513#define PR_REG_SIZE(S) sizeof(S)
1514#endif
1515
1516#ifndef PRSTATUS_SIZE
1517#define PRSTATUS_SIZE(S) sizeof(S)
1518#endif
1519
1520#ifndef PR_REG_PTR
1521#define PR_REG_PTR(S) (&((S)->pr_reg))
1522#endif
1523
1524#ifndef SET_PR_FPVALID
1525#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1526#endif
1527
1528static int fill_thread_core_info(struct elf_thread_core_info *t,
1529                                 const struct user_regset_view *view,
1530                                 long signr, size_t *total)
1531{
1532        unsigned int i;
1533
1534        /*
1535         * NT_PRSTATUS is the one special case, because the regset data
1536         * goes into the pr_reg field inside the note contents, rather
1537         * than being the whole note contents.  We fill the reset in here.
1538         * We assume that regset 0 is NT_PRSTATUS.
1539         */
1540        fill_prstatus(&t->prstatus, t->task, signr);
1541        (void) view->regsets[0].get(t->task, &view->regsets[0],
1542                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1543                                    PR_REG_PTR(&t->prstatus), NULL);
1544
1545        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1546                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1547        *total += notesize(&t->notes[0]);
1548
1549        do_thread_regset_writeback(t->task, &view->regsets[0]);
1550
1551        /*
1552         * Each other regset might generate a note too.  For each regset
1553         * that has no core_note_type or is inactive, we leave t->notes[i]
1554         * all zero and we'll know to skip writing it later.
1555         */
1556        for (i = 1; i < view->n; ++i) {
1557                const struct user_regset *regset = &view->regsets[i];
1558                do_thread_regset_writeback(t->task, regset);
1559                if (regset->core_note_type && regset->get &&
1560                    (!regset->active || regset->active(t->task, regset))) {
1561                        int ret;
1562                        size_t size = regset->n * regset->size;
1563                        void *data = kmalloc(size, GFP_KERNEL);
1564                        if (unlikely(!data))
1565                                return 0;
1566                        ret = regset->get(t->task, regset,
1567                                          0, size, data, NULL);
1568                        if (unlikely(ret))
1569                                kfree(data);
1570                        else {
1571                                if (regset->core_note_type != NT_PRFPREG)
1572                                        fill_note(&t->notes[i], "LINUX",
1573                                                  regset->core_note_type,
1574                                                  size, data);
1575                                else {
1576                                        SET_PR_FPVALID(&t->prstatus, 1);
1577                                        fill_note(&t->notes[i], "CORE",
1578                                                  NT_PRFPREG, size, data);
1579                                }
1580                                *total += notesize(&t->notes[i]);
1581                        }
1582                }
1583        }
1584
1585        return 1;
1586}
1587
1588static int fill_note_info(struct elfhdr *elf, int phdrs,
1589                          struct elf_note_info *info,
1590                          siginfo_t *siginfo, struct pt_regs *regs)
1591{
1592        struct task_struct *dump_task = current;
1593        const struct user_regset_view *view = task_user_regset_view(dump_task);
1594        struct elf_thread_core_info *t;
1595        struct elf_prpsinfo *psinfo;
1596        struct core_thread *ct;
1597        unsigned int i;
1598
1599        info->size = 0;
1600        info->thread = NULL;
1601
1602        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1603        if (psinfo == NULL)
1604                return 0;
1605
1606        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1607
1608        /*
1609         * Figure out how many notes we're going to need for each thread.
1610         */
1611        info->thread_notes = 0;
1612        for (i = 0; i < view->n; ++i)
1613                if (view->regsets[i].core_note_type != 0)
1614                        ++info->thread_notes;
1615
1616        /*
1617         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1618         * since it is our one special case.
1619         */
1620        if (unlikely(info->thread_notes == 0) ||
1621            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1622                WARN_ON(1);
1623                return 0;
1624        }
1625
1626        /*
1627         * Initialize the ELF file header.
1628         */
1629        fill_elf_header(elf, phdrs,
1630                        view->e_machine, view->e_flags, view->ei_osabi);
1631
1632        /*
1633         * Allocate a structure for each thread.
1634         */
1635        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1636                t = kzalloc(offsetof(struct elf_thread_core_info,
1637                                     notes[info->thread_notes]),
1638                            GFP_KERNEL);
1639                if (unlikely(!t))
1640                        return 0;
1641
1642                t->task = ct->task;
1643                if (ct->task == dump_task || !info->thread) {
1644                        t->next = info->thread;
1645                        info->thread = t;
1646                } else {
1647                        /*
1648                         * Make sure to keep the original task at
1649                         * the head of the list.
1650                         */
1651                        t->next = info->thread->next;
1652                        info->thread->next = t;
1653                }
1654        }
1655
1656        /*
1657         * Now fill in each thread's information.
1658         */
1659        for (t = info->thread; t != NULL; t = t->next)
1660                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1661                        return 0;
1662
1663        /*
1664         * Fill in the two process-wide notes.
1665         */
1666        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1667        info->size += notesize(&info->psinfo);
1668
1669        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1670        info->size += notesize(&info->signote);
1671
1672        fill_auxv_note(&info->auxv, current->mm);
1673        info->size += notesize(&info->auxv);
1674
1675        fill_files_note(&info->files);
1676        info->size += notesize(&info->files);
1677
1678        return 1;
1679}
1680
1681static size_t get_note_info_size(struct elf_note_info *info)
1682{
1683        return info->size;
1684}
1685
1686/*
1687 * Write all the notes for each thread.  When writing the first thread, the
1688 * process-wide notes are interleaved after the first thread-specific note.
1689 */
1690static int write_note_info(struct elf_note_info *info,
1691                           struct file *file, loff_t *foffset)
1692{
1693        bool first = 1;
1694        struct elf_thread_core_info *t = info->thread;
1695
1696        do {
1697                int i;
1698
1699                if (!writenote(&t->notes[0], file, foffset))
1700                        return 0;
1701
1702                if (first && !writenote(&info->psinfo, file, foffset))
1703                        return 0;
1704                if (first && !writenote(&info->signote, file, foffset))
1705                        return 0;
1706                if (first && !writenote(&info->auxv, file, foffset))
1707                        return 0;
1708                if (first && !writenote(&info->files, file, foffset))
1709                        return 0;
1710
1711                for (i = 1; i < info->thread_notes; ++i)
1712                        if (t->notes[i].data &&
1713                            !writenote(&t->notes[i], file, foffset))
1714                                return 0;
1715
1716                first = 0;
1717                t = t->next;
1718        } while (t);
1719
1720        return 1;
1721}
1722
1723static void free_note_info(struct elf_note_info *info)
1724{
1725        struct elf_thread_core_info *threads = info->thread;
1726        while (threads) {
1727                unsigned int i;
1728                struct elf_thread_core_info *t = threads;
1729                threads = t->next;
1730                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1731                for (i = 1; i < info->thread_notes; ++i)
1732                        kfree(t->notes[i].data);
1733                kfree(t);
1734        }
1735        kfree(info->psinfo.data);
1736        vfree(info->files.data);
1737}
1738
1739#else
1740
1741/* Here is the structure in which status of each thread is captured. */
1742struct elf_thread_status
1743{
1744        struct list_head list;
1745        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1746        elf_fpregset_t fpu;             /* NT_PRFPREG */
1747        struct task_struct *thread;
1748#ifdef ELF_CORE_COPY_XFPREGS
1749        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1750#endif
1751        struct memelfnote notes[3];
1752        int num_notes;
1753};
1754
1755/*
1756 * In order to add the specific thread information for the elf file format,
1757 * we need to keep a linked list of every threads pr_status and then create
1758 * a single section for them in the final core file.
1759 */
1760static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1761{
1762        int sz = 0;
1763        struct task_struct *p = t->thread;
1764        t->num_notes = 0;
1765
1766        fill_prstatus(&t->prstatus, p, signr);
1767        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1768        
1769        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1770                  &(t->prstatus));
1771        t->num_notes++;
1772        sz += notesize(&t->notes[0]);
1773
1774        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1775                                                                &t->fpu))) {
1776                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1777                          &(t->fpu));
1778                t->num_notes++;
1779                sz += notesize(&t->notes[1]);
1780        }
1781
1782#ifdef ELF_CORE_COPY_XFPREGS
1783        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1784                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1785                          sizeof(t->xfpu), &t->xfpu);
1786                t->num_notes++;
1787                sz += notesize(&t->notes[2]);
1788        }
1789#endif  
1790        return sz;
1791}
1792
1793struct elf_note_info {
1794        struct memelfnote *notes;
1795        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1796        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1797        struct list_head thread_list;
1798        elf_fpregset_t *fpu;
1799#ifdef ELF_CORE_COPY_XFPREGS
1800        elf_fpxregset_t *xfpu;
1801#endif
1802        user_siginfo_t csigdata;
1803        int thread_status_size;
1804        int numnote;
1805};
1806
1807static int elf_note_info_init(struct elf_note_info *info)
1808{
1809        memset(info, 0, sizeof(*info));
1810        INIT_LIST_HEAD(&info->thread_list);
1811
1812        /* Allocate space for ELF notes */
1813        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1814        if (!info->notes)
1815                return 0;
1816        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1817        if (!info->psinfo)
1818                return 0;
1819        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1820        if (!info->prstatus)
1821                return 0;
1822        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1823        if (!info->fpu)
1824                return 0;
1825#ifdef ELF_CORE_COPY_XFPREGS
1826        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1827        if (!info->xfpu)
1828                return 0;
1829#endif
1830        return 1;
1831}
1832
1833static int fill_note_info(struct elfhdr *elf, int phdrs,
1834                          struct elf_note_info *info,
1835                          siginfo_t *siginfo, struct pt_regs *regs)
1836{
1837        struct list_head *t;
1838
1839        if (!elf_note_info_init(info))
1840                return 0;
1841
1842        if (siginfo->si_signo) {
1843                struct core_thread *ct;
1844                struct elf_thread_status *ets;
1845
1846                for (ct = current->mm->core_state->dumper.next;
1847                                                ct; ct = ct->next) {
1848                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1849                        if (!ets)
1850                                return 0;
1851
1852                        ets->thread = ct->task;
1853                        list_add(&ets->list, &info->thread_list);
1854                }
1855
1856                list_for_each(t, &info->thread_list) {
1857                        int sz;
1858
1859                        ets = list_entry(t, struct elf_thread_status, list);
1860                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1861                        info->thread_status_size += sz;
1862                }
1863        }
1864        /* now collect the dump for the current */
1865        memset(info->prstatus, 0, sizeof(*info->prstatus));
1866        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1867        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1868
1869        /* Set up header */
1870        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1871
1872        /*
1873         * Set up the notes in similar form to SVR4 core dumps made
1874         * with info from their /proc.
1875         */
1876
1877        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1878                  sizeof(*info->prstatus), info->prstatus);
1879        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1880        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1881                  sizeof(*info->psinfo), info->psinfo);
1882
1883        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1884        fill_auxv_note(info->notes + 3, current->mm);
1885        fill_files_note(info->notes + 4);
1886
1887        info->numnote = 5;
1888
1889        /* Try to dump the FPU. */
1890        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1891                                                               info->fpu);
1892        if (info->prstatus->pr_fpvalid)
1893                fill_note(info->notes + info->numnote++,
1894                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1895#ifdef ELF_CORE_COPY_XFPREGS
1896        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1897                fill_note(info->notes + info->numnote++,
1898                          "LINUX", ELF_CORE_XFPREG_TYPE,
1899                          sizeof(*info->xfpu), info->xfpu);
1900#endif
1901
1902        return 1;
1903}
1904
1905static size_t get_note_info_size(struct elf_note_info *info)
1906{
1907        int sz = 0;
1908        int i;
1909
1910        for (i = 0; i < info->numnote; i++)
1911                sz += notesize(info->notes + i);
1912
1913        sz += info->thread_status_size;
1914
1915        return sz;
1916}
1917
1918static int write_note_info(struct elf_note_info *info,
1919                           struct file *file, loff_t *foffset)
1920{
1921        int i;
1922        struct list_head *t;
1923
1924        for (i = 0; i < info->numnote; i++)
1925                if (!writenote(info->notes + i, file, foffset))
1926                        return 0;
1927
1928        /* write out the thread status notes section */
1929        list_for_each(t, &info->thread_list) {
1930                struct elf_thread_status *tmp =
1931                                list_entry(t, struct elf_thread_status, list);
1932
1933                for (i = 0; i < tmp->num_notes; i++)
1934                        if (!writenote(&tmp->notes[i], file, foffset))
1935                                return 0;
1936        }
1937
1938        return 1;
1939}
1940
1941static void free_note_info(struct elf_note_info *info)
1942{
1943        while (!list_empty(&info->thread_list)) {
1944                struct list_head *tmp = info->thread_list.next;
1945                list_del(tmp);
1946                kfree(list_entry(tmp, struct elf_thread_status, list));
1947        }
1948
1949        /* Free data allocated by fill_files_note(): */
1950        vfree(info->notes[4].data);
1951
1952        kfree(info->prstatus);
1953        kfree(info->psinfo);
1954        kfree(info->notes);
1955        kfree(info->fpu);
1956#ifdef ELF_CORE_COPY_XFPREGS
1957        kfree(info->xfpu);
1958#endif
1959}
1960
1961#endif
1962
1963static struct vm_area_struct *first_vma(struct task_struct *tsk,
1964                                        struct vm_area_struct *gate_vma)
1965{
1966        struct vm_area_struct *ret = tsk->mm->mmap;
1967
1968        if (ret)
1969                return ret;
1970        return gate_vma;
1971}
1972/*
1973 * Helper function for iterating across a vma list.  It ensures that the caller
1974 * will visit `gate_vma' prior to terminating the search.
1975 */
1976static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1977                                        struct vm_area_struct *gate_vma)
1978{
1979        struct vm_area_struct *ret;
1980
1981        ret = this_vma->vm_next;
1982        if (ret)
1983                return ret;
1984        if (this_vma == gate_vma)
1985                return NULL;
1986        return gate_vma;
1987}
1988
1989static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1990                             elf_addr_t e_shoff, int segs)
1991{
1992        elf->e_shoff = e_shoff;
1993        elf->e_shentsize = sizeof(*shdr4extnum);
1994        elf->e_shnum = 1;
1995        elf->e_shstrndx = SHN_UNDEF;
1996
1997        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1998
1999        shdr4extnum->sh_type = SHT_NULL;
2000        shdr4extnum->sh_size = elf->e_shnum;
2001        shdr4extnum->sh_link = elf->e_shstrndx;
2002        shdr4extnum->sh_info = segs;
2003}
2004
2005static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2006                                     unsigned long mm_flags)
2007{
2008        struct vm_area_struct *vma;
2009        size_t size = 0;
2010
2011        for (vma = first_vma(current, gate_vma); vma != NULL;
2012             vma = next_vma(vma, gate_vma))
2013                size += vma_dump_size(vma, mm_flags);
2014        return size;
2015}
2016
2017/*
2018 * Actual dumper
2019 *
2020 * This is a two-pass process; first we find the offsets of the bits,
2021 * and then they are actually written out.  If we run out of core limit
2022 * we just truncate.
2023 */
2024static int elf_core_dump(struct coredump_params *cprm)
2025{
2026        int has_dumped = 0;
2027        mm_segment_t fs;
2028        int segs;
2029        size_t size = 0;
2030        struct vm_area_struct *vma, *gate_vma;
2031        struct elfhdr *elf = NULL;
2032        loff_t offset = 0, dataoff, foffset;
2033        struct elf_note_info info;
2034        struct elf_phdr *phdr4note = NULL;
2035        struct elf_shdr *shdr4extnum = NULL;
2036        Elf_Half e_phnum;
2037        elf_addr_t e_shoff;
2038
2039        /*
2040         * We no longer stop all VM operations.
2041         * 
2042         * This is because those proceses that could possibly change map_count
2043         * or the mmap / vma pages are now blocked in do_exit on current
2044         * finishing this core dump.
2045         *
2046         * Only ptrace can touch these memory addresses, but it doesn't change
2047         * the map_count or the pages allocated. So no possibility of crashing
2048         * exists while dumping the mm->vm_next areas to the core file.
2049         */
2050  
2051        /* alloc memory for large data structures: too large to be on stack */
2052        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2053        if (!elf)
2054                goto out;
2055        /*
2056         * The number of segs are recored into ELF header as 16bit value.
2057         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2058         */
2059        segs = current->mm->map_count;
2060        segs += elf_core_extra_phdrs();
2061
2062        gate_vma = get_gate_vma(current->mm);
2063        if (gate_vma != NULL)
2064                segs++;
2065
2066        /* for notes section */
2067        segs++;
2068
2069        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2070         * this, kernel supports extended numbering. Have a look at
2071         * include/linux/elf.h for further information. */
2072        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2073
2074        /*
2075         * Collect all the non-memory information about the process for the
2076         * notes.  This also sets up the file header.
2077         */
2078        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2079                goto cleanup;
2080
2081        has_dumped = 1;
2082        current->flags |= PF_DUMPCORE;
2083  
2084        fs = get_fs();
2085        set_fs(KERNEL_DS);
2086
2087        offset += sizeof(*elf);                         /* Elf header */
2088        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2089        foffset = offset;
2090
2091        /* Write notes phdr entry */
2092        {
2093                size_t sz = get_note_info_size(&info);
2094
2095                sz += elf_coredump_extra_notes_size();
2096
2097                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2098                if (!phdr4note)
2099                        goto end_coredump;
2100
2101                fill_elf_note_phdr(phdr4note, sz, offset);
2102                offset += sz;
2103        }
2104
2105        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2106
2107        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2108        offset += elf_core_extra_data_size();
2109        e_shoff = offset;
2110
2111        if (e_phnum == PN_XNUM) {
2112                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2113                if (!shdr4extnum)
2114                        goto end_coredump;
2115                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2116        }
2117
2118        offset = dataoff;
2119
2120        size += sizeof(*elf);
2121        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2122                goto end_coredump;
2123
2124        size += sizeof(*phdr4note);
2125        if (size > cprm->limit
2126            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2127                goto end_coredump;
2128
2129        /* Write program headers for segments dump */
2130        for (vma = first_vma(current, gate_vma); vma != NULL;
2131                        vma = next_vma(vma, gate_vma)) {
2132                struct elf_phdr phdr;
2133
2134                phdr.p_type = PT_LOAD;
2135                phdr.p_offset = offset;
2136                phdr.p_vaddr = vma->vm_start;
2137                phdr.p_paddr = 0;
2138                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2139                phdr.p_memsz = vma->vm_end - vma->vm_start;
2140                offset += phdr.p_filesz;
2141                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2142                if (vma->vm_flags & VM_WRITE)
2143                        phdr.p_flags |= PF_W;
2144                if (vma->vm_flags & VM_EXEC)
2145                        phdr.p_flags |= PF_X;
2146                phdr.p_align = ELF_EXEC_PAGESIZE;
2147
2148                size += sizeof(phdr);
2149                if (size > cprm->limit
2150                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2151                        goto end_coredump;
2152        }
2153
2154        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2155                goto end_coredump;
2156
2157        /* write out the notes section */
2158        if (!write_note_info(&info, cprm->file, &foffset))
2159                goto end_coredump;
2160
2161        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2162                goto end_coredump;
2163
2164        /* Align to page */
2165        if (!dump_seek(cprm->file, dataoff - foffset))
2166                goto end_coredump;
2167
2168        for (vma = first_vma(current, gate_vma); vma != NULL;
2169                        vma = next_vma(vma, gate_vma)) {
2170                unsigned long addr;
2171                unsigned long end;
2172
2173                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2174
2175                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2176                        struct page *page;
2177                        int stop;
2178
2179                        page = get_dump_page(addr);
2180                        if (page) {
2181                                void *kaddr = kmap(page);
2182                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2183                                        !dump_write(cprm->file, kaddr,
2184                                                    PAGE_SIZE);
2185                                kunmap(page);
2186                                page_cache_release(page);
2187                        } else
2188                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2189                        if (stop)
2190                                goto end_coredump;
2191                }
2192        }
2193
2194        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2195                goto end_coredump;
2196
2197        if (e_phnum == PN_XNUM) {
2198                size += sizeof(*shdr4extnum);
2199                if (size > cprm->limit
2200                    || !dump_write(cprm->file, shdr4extnum,
2201                                   sizeof(*shdr4extnum)))
2202                        goto end_coredump;
2203        }
2204
2205end_coredump:
2206        set_fs(fs);
2207
2208cleanup:
2209        free_note_info(&info);
2210        kfree(shdr4extnum);
2211        kfree(phdr4note);
2212        kfree(elf);
2213out:
2214        return has_dumped;
2215}
2216
2217#endif          /* CONFIG_ELF_CORE */
2218
2219static int __init init_elf_binfmt(void)
2220{
2221        register_binfmt(&elf_format);
2222        return 0;
2223}
2224
2225static void __exit exit_elf_binfmt(void)
2226{
2227        /* Remove the COFF and ELF loaders. */
2228        unregister_binfmt(&elf_format);
2229}
2230
2231core_initcall(init_elf_binfmt);
2232module_exit(exit_elf_binfmt);
2233MODULE_LICENSE("GPL");
2234
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.