linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/utsname.h>
  35#include <linux/coredump.h>
  36#include <asm/uaccess.h>
  37#include <asm/param.h>
  38#include <asm/page.h>
  39
  40#ifndef user_long_t
  41#define user_long_t long
  42#endif
  43#ifndef user_siginfo_t
  44#define user_siginfo_t siginfo_t
  45#endif
  46
  47static int load_elf_binary(struct linux_binprm *bprm);
  48static int load_elf_library(struct file *);
  49static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  50                                int, int, unsigned long);
  51
  52/*
  53 * If we don't support core dumping, then supply a NULL so we
  54 * don't even try.
  55 */
  56#ifdef CONFIG_ELF_CORE
  57static int elf_core_dump(struct coredump_params *cprm);
  58#else
  59#define elf_core_dump   NULL
  60#endif
  61
  62#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  63#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  64#else
  65#define ELF_MIN_ALIGN   PAGE_SIZE
  66#endif
  67
  68#ifndef ELF_CORE_EFLAGS
  69#define ELF_CORE_EFLAGS 0
  70#endif
  71
  72#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  73#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  74#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  75
  76static struct linux_binfmt elf_format = {
  77        .module         = THIS_MODULE,
  78        .load_binary    = load_elf_binary,
  79        .load_shlib     = load_elf_library,
  80        .core_dump      = elf_core_dump,
  81        .min_coredump   = ELF_EXEC_PAGESIZE,
  82};
  83
  84#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  85
  86static int set_brk(unsigned long start, unsigned long end)
  87{
  88        start = ELF_PAGEALIGN(start);
  89        end = ELF_PAGEALIGN(end);
  90        if (end > start) {
  91                unsigned long addr;
  92                addr = vm_brk(start, end - start);
  93                if (BAD_ADDR(addr))
  94                        return addr;
  95        }
  96        current->mm->start_brk = current->mm->brk = end;
  97        return 0;
  98}
  99
 100/* We need to explicitly zero any fractional pages
 101   after the data section (i.e. bss).  This would
 102   contain the junk from the file that should not
 103   be in memory
 104 */
 105static int padzero(unsigned long elf_bss)
 106{
 107        unsigned long nbyte;
 108
 109        nbyte = ELF_PAGEOFFSET(elf_bss);
 110        if (nbyte) {
 111                nbyte = ELF_MIN_ALIGN - nbyte;
 112                if (clear_user((void __user *) elf_bss, nbyte))
 113                        return -EFAULT;
 114        }
 115        return 0;
 116}
 117
 118/* Let's use some macros to make this stack manipulation a little clearer */
 119#ifdef CONFIG_STACK_GROWSUP
 120#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 121#define STACK_ROUND(sp, items) \
 122        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 123#define STACK_ALLOC(sp, len) ({ \
 124        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 125        old_sp; })
 126#else
 127#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 128#define STACK_ROUND(sp, items) \
 129        (((unsigned long) (sp - items)) &~ 15UL)
 130#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 131#endif
 132
 133#ifndef ELF_BASE_PLATFORM
 134/*
 135 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 136 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 137 * will be copied to the user stack in the same manner as AT_PLATFORM.
 138 */
 139#define ELF_BASE_PLATFORM NULL
 140#endif
 141
 142static int
 143create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 144                unsigned long load_addr, unsigned long interp_load_addr)
 145{
 146        unsigned long p = bprm->p;
 147        int argc = bprm->argc;
 148        int envc = bprm->envc;
 149        elf_addr_t __user *argv;
 150        elf_addr_t __user *envp;
 151        elf_addr_t __user *sp;
 152        elf_addr_t __user *u_platform;
 153        elf_addr_t __user *u_base_platform;
 154        elf_addr_t __user *u_rand_bytes;
 155        const char *k_platform = ELF_PLATFORM;
 156        const char *k_base_platform = ELF_BASE_PLATFORM;
 157        unsigned char k_rand_bytes[16];
 158        int items;
 159        elf_addr_t *elf_info;
 160        int ei_index = 0;
 161        const struct cred *cred = current_cred();
 162        struct vm_area_struct *vma;
 163
 164        /*
 165         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 166         * evictions by the processes running on the same package. One
 167         * thing we can do is to shuffle the initial stack for them.
 168         */
 169
 170        p = arch_align_stack(p);
 171
 172        /*
 173         * If this architecture has a platform capability string, copy it
 174         * to userspace.  In some cases (Sparc), this info is impossible
 175         * for userspace to get any other way, in others (i386) it is
 176         * merely difficult.
 177         */
 178        u_platform = NULL;
 179        if (k_platform) {
 180                size_t len = strlen(k_platform) + 1;
 181
 182                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 183                if (__copy_to_user(u_platform, k_platform, len))
 184                        return -EFAULT;
 185        }
 186
 187        /*
 188         * If this architecture has a "base" platform capability
 189         * string, copy it to userspace.
 190         */
 191        u_base_platform = NULL;
 192        if (k_base_platform) {
 193                size_t len = strlen(k_base_platform) + 1;
 194
 195                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 196                if (__copy_to_user(u_base_platform, k_base_platform, len))
 197                        return -EFAULT;
 198        }
 199
 200        /*
 201         * Generate 16 random bytes for userspace PRNG seeding.
 202         */
 203        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 204        u_rand_bytes = (elf_addr_t __user *)
 205                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 206        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 207                return -EFAULT;
 208
 209        /* Create the ELF interpreter info */
 210        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 211        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 212#define NEW_AUX_ENT(id, val) \
 213        do { \
 214                elf_info[ei_index++] = id; \
 215                elf_info[ei_index++] = val; \
 216        } while (0)
 217
 218#ifdef ARCH_DLINFO
 219        /* 
 220         * ARCH_DLINFO must come first so PPC can do its special alignment of
 221         * AUXV.
 222         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 223         * ARCH_DLINFO changes
 224         */
 225        ARCH_DLINFO;
 226#endif
 227        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 228        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 229        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 230        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 231        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 232        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 233        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 234        NEW_AUX_ENT(AT_FLAGS, 0);
 235        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 236        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 237        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 238        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 239        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 240        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 241        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 242        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 243        if (k_platform) {
 244                NEW_AUX_ENT(AT_PLATFORM,
 245                            (elf_addr_t)(unsigned long)u_platform);
 246        }
 247        if (k_base_platform) {
 248                NEW_AUX_ENT(AT_BASE_PLATFORM,
 249                            (elf_addr_t)(unsigned long)u_base_platform);
 250        }
 251        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 252                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 253        }
 254#undef NEW_AUX_ENT
 255        /* AT_NULL is zero; clear the rest too */
 256        memset(&elf_info[ei_index], 0,
 257               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 258
 259        /* And advance past the AT_NULL entry.  */
 260        ei_index += 2;
 261
 262        sp = STACK_ADD(p, ei_index);
 263
 264        items = (argc + 1) + (envc + 1) + 1;
 265        bprm->p = STACK_ROUND(sp, items);
 266
 267        /* Point sp at the lowest address on the stack */
 268#ifdef CONFIG_STACK_GROWSUP
 269        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 270        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 271#else
 272        sp = (elf_addr_t __user *)bprm->p;
 273#endif
 274
 275
 276        /*
 277         * Grow the stack manually; some architectures have a limit on how
 278         * far ahead a user-space access may be in order to grow the stack.
 279         */
 280        vma = find_extend_vma(current->mm, bprm->p);
 281        if (!vma)
 282                return -EFAULT;
 283
 284        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 285        if (__put_user(argc, sp++))
 286                return -EFAULT;
 287        argv = sp;
 288        envp = argv + argc + 1;
 289
 290        /* Populate argv and envp */
 291        p = current->mm->arg_end = current->mm->arg_start;
 292        while (argc-- > 0) {
 293                size_t len;
 294                if (__put_user((elf_addr_t)p, argv++))
 295                        return -EFAULT;
 296                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 297                if (!len || len > MAX_ARG_STRLEN)
 298                        return -EINVAL;
 299                p += len;
 300        }
 301        if (__put_user(0, argv))
 302                return -EFAULT;
 303        current->mm->arg_end = current->mm->env_start = p;
 304        while (envc-- > 0) {
 305                size_t len;
 306                if (__put_user((elf_addr_t)p, envp++))
 307                        return -EFAULT;
 308                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 309                if (!len || len > MAX_ARG_STRLEN)
 310                        return -EINVAL;
 311                p += len;
 312        }
 313        if (__put_user(0, envp))
 314                return -EFAULT;
 315        current->mm->env_end = p;
 316
 317        /* Put the elf_info on the stack in the right place.  */
 318        sp = (elf_addr_t __user *)envp + 1;
 319        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 320                return -EFAULT;
 321        return 0;
 322}
 323
 324static unsigned long elf_map(struct file *filep, unsigned long addr,
 325                struct elf_phdr *eppnt, int prot, int type,
 326                unsigned long total_size)
 327{
 328        unsigned long map_addr;
 329        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 330        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 331        addr = ELF_PAGESTART(addr);
 332        size = ELF_PAGEALIGN(size);
 333
 334        /* mmap() will return -EINVAL if given a zero size, but a
 335         * segment with zero filesize is perfectly valid */
 336        if (!size)
 337                return addr;
 338
 339        /*
 340        * total_size is the size of the ELF (interpreter) image.
 341        * The _first_ mmap needs to know the full size, otherwise
 342        * randomization might put this image into an overlapping
 343        * position with the ELF binary image. (since size < total_size)
 344        * So we first map the 'big' image - and unmap the remainder at
 345        * the end. (which unmap is needed for ELF images with holes.)
 346        */
 347        if (total_size) {
 348                total_size = ELF_PAGEALIGN(total_size);
 349                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 350                if (!BAD_ADDR(map_addr))
 351                        vm_munmap(map_addr+size, total_size-size);
 352        } else
 353                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 354
 355        return(map_addr);
 356}
 357
 358static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 359{
 360        int i, first_idx = -1, last_idx = -1;
 361
 362        for (i = 0; i < nr; i++) {
 363                if (cmds[i].p_type == PT_LOAD) {
 364                        last_idx = i;
 365                        if (first_idx == -1)
 366                                first_idx = i;
 367                }
 368        }
 369        if (first_idx == -1)
 370                return 0;
 371
 372        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 373                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 374}
 375
 376
 377/* This is much more generalized than the library routine read function,
 378   so we keep this separate.  Technically the library read function
 379   is only provided so that we can read a.out libraries that have
 380   an ELF header */
 381
 382static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 383                struct file *interpreter, unsigned long *interp_map_addr,
 384                unsigned long no_base)
 385{
 386        struct elf_phdr *elf_phdata;
 387        struct elf_phdr *eppnt;
 388        unsigned long load_addr = 0;
 389        int load_addr_set = 0;
 390        unsigned long last_bss = 0, elf_bss = 0;
 391        unsigned long error = ~0UL;
 392        unsigned long total_size;
 393        int retval, i, size;
 394
 395        /* First of all, some simple consistency checks */
 396        if (interp_elf_ex->e_type != ET_EXEC &&
 397            interp_elf_ex->e_type != ET_DYN)
 398                goto out;
 399        if (!elf_check_arch(interp_elf_ex))
 400                goto out;
 401        if (!interpreter->f_op || !interpreter->f_op->mmap)
 402                goto out;
 403
 404        /*
 405         * If the size of this structure has changed, then punt, since
 406         * we will be doing the wrong thing.
 407         */
 408        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 409                goto out;
 410        if (interp_elf_ex->e_phnum < 1 ||
 411                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 412                goto out;
 413
 414        /* Now read in all of the header information */
 415        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 416        if (size > ELF_MIN_ALIGN)
 417                goto out;
 418        elf_phdata = kmalloc(size, GFP_KERNEL);
 419        if (!elf_phdata)
 420                goto out;
 421
 422        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 423                             (char *)elf_phdata, size);
 424        error = -EIO;
 425        if (retval != size) {
 426                if (retval < 0)
 427                        error = retval; 
 428                goto out_close;
 429        }
 430
 431        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 432        if (!total_size) {
 433                error = -EINVAL;
 434                goto out_close;
 435        }
 436
 437        eppnt = elf_phdata;
 438        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 439                if (eppnt->p_type == PT_LOAD) {
 440                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 441                        int elf_prot = 0;
 442                        unsigned long vaddr = 0;
 443                        unsigned long k, map_addr;
 444
 445                        if (eppnt->p_flags & PF_R)
 446                                elf_prot = PROT_READ;
 447                        if (eppnt->p_flags & PF_W)
 448                                elf_prot |= PROT_WRITE;
 449                        if (eppnt->p_flags & PF_X)
 450                                elf_prot |= PROT_EXEC;
 451                        vaddr = eppnt->p_vaddr;
 452                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 453                                elf_type |= MAP_FIXED;
 454                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 455                                load_addr = -vaddr;
 456
 457                        map_addr = elf_map(interpreter, load_addr + vaddr,
 458                                        eppnt, elf_prot, elf_type, total_size);
 459                        total_size = 0;
 460                        if (!*interp_map_addr)
 461                                *interp_map_addr = map_addr;
 462                        error = map_addr;
 463                        if (BAD_ADDR(map_addr))
 464                                goto out_close;
 465
 466                        if (!load_addr_set &&
 467                            interp_elf_ex->e_type == ET_DYN) {
 468                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 469                                load_addr_set = 1;
 470                        }
 471
 472                        /*
 473                         * Check to see if the section's size will overflow the
 474                         * allowed task size. Note that p_filesz must always be
 475                         * <= p_memsize so it's only necessary to check p_memsz.
 476                         */
 477                        k = load_addr + eppnt->p_vaddr;
 478                        if (BAD_ADDR(k) ||
 479                            eppnt->p_filesz > eppnt->p_memsz ||
 480                            eppnt->p_memsz > TASK_SIZE ||
 481                            TASK_SIZE - eppnt->p_memsz < k) {
 482                                error = -ENOMEM;
 483                                goto out_close;
 484                        }
 485
 486                        /*
 487                         * Find the end of the file mapping for this phdr, and
 488                         * keep track of the largest address we see for this.
 489                         */
 490                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 491                        if (k > elf_bss)
 492                                elf_bss = k;
 493
 494                        /*
 495                         * Do the same thing for the memory mapping - between
 496                         * elf_bss and last_bss is the bss section.
 497                         */
 498                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 499                        if (k > last_bss)
 500                                last_bss = k;
 501                }
 502        }
 503
 504        if (last_bss > elf_bss) {
 505                /*
 506                 * Now fill out the bss section.  First pad the last page up
 507                 * to the page boundary, and then perform a mmap to make sure
 508                 * that there are zero-mapped pages up to and including the
 509                 * last bss page.
 510                 */
 511                if (padzero(elf_bss)) {
 512                        error = -EFAULT;
 513                        goto out_close;
 514                }
 515
 516                /* What we have mapped so far */
 517                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 518
 519                /* Map the last of the bss segment */
 520                error = vm_brk(elf_bss, last_bss - elf_bss);
 521                if (BAD_ADDR(error))
 522                        goto out_close;
 523        }
 524
 525        error = load_addr;
 526
 527out_close:
 528        kfree(elf_phdata);
 529out:
 530        return error;
 531}
 532
 533/*
 534 * These are the functions used to load ELF style executables and shared
 535 * libraries.  There is no binary dependent code anywhere else.
 536 */
 537
 538#define INTERPRETER_NONE 0
 539#define INTERPRETER_ELF 2
 540
 541#ifndef STACK_RND_MASK
 542#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 543#endif
 544
 545static unsigned long randomize_stack_top(unsigned long stack_top)
 546{
 547        unsigned int random_variable = 0;
 548
 549        if ((current->flags & PF_RANDOMIZE) &&
 550                !(current->personality & ADDR_NO_RANDOMIZE)) {
 551                random_variable = get_random_int() & STACK_RND_MASK;
 552                random_variable <<= PAGE_SHIFT;
 553        }
 554#ifdef CONFIG_STACK_GROWSUP
 555        return PAGE_ALIGN(stack_top) + random_variable;
 556#else
 557        return PAGE_ALIGN(stack_top) - random_variable;
 558#endif
 559}
 560
 561static int load_elf_binary(struct linux_binprm *bprm)
 562{
 563        struct file *interpreter = NULL; /* to shut gcc up */
 564        unsigned long load_addr = 0, load_bias = 0;
 565        int load_addr_set = 0;
 566        char * elf_interpreter = NULL;
 567        unsigned long error;
 568        struct elf_phdr *elf_ppnt, *elf_phdata;
 569        unsigned long elf_bss, elf_brk;
 570        int retval, i;
 571        unsigned int size;
 572        unsigned long elf_entry;
 573        unsigned long interp_load_addr = 0;
 574        unsigned long start_code, end_code, start_data, end_data;
 575        unsigned long reloc_func_desc __maybe_unused = 0;
 576        int executable_stack = EXSTACK_DEFAULT;
 577        unsigned long def_flags = 0;
 578        struct pt_regs *regs = current_pt_regs();
 579        struct {
 580                struct elfhdr elf_ex;
 581                struct elfhdr interp_elf_ex;
 582        } *loc;
 583
 584        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 585        if (!loc) {
 586                retval = -ENOMEM;
 587                goto out_ret;
 588        }
 589        
 590        /* Get the exec-header */
 591        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 592
 593        retval = -ENOEXEC;
 594        /* First of all, some simple consistency checks */
 595        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 596                goto out;
 597
 598        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 599                goto out;
 600        if (!elf_check_arch(&loc->elf_ex))
 601                goto out;
 602        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 603                goto out;
 604
 605        /* Now read in all of the header information */
 606        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 607                goto out;
 608        if (loc->elf_ex.e_phnum < 1 ||
 609                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 610                goto out;
 611        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 612        retval = -ENOMEM;
 613        elf_phdata = kmalloc(size, GFP_KERNEL);
 614        if (!elf_phdata)
 615                goto out;
 616
 617        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 618                             (char *)elf_phdata, size);
 619        if (retval != size) {
 620                if (retval >= 0)
 621                        retval = -EIO;
 622                goto out_free_ph;
 623        }
 624
 625        elf_ppnt = elf_phdata;
 626        elf_bss = 0;
 627        elf_brk = 0;
 628
 629        start_code = ~0UL;
 630        end_code = 0;
 631        start_data = 0;
 632        end_data = 0;
 633
 634        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 635                if (elf_ppnt->p_type == PT_INTERP) {
 636                        /* This is the program interpreter used for
 637                         * shared libraries - for now assume that this
 638                         * is an a.out format binary
 639                         */
 640                        retval = -ENOEXEC;
 641                        if (elf_ppnt->p_filesz > PATH_MAX || 
 642                            elf_ppnt->p_filesz < 2)
 643                                goto out_free_ph;
 644
 645                        retval = -ENOMEM;
 646                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 647                                                  GFP_KERNEL);
 648                        if (!elf_interpreter)
 649                                goto out_free_ph;
 650
 651                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 652                                             elf_interpreter,
 653                                             elf_ppnt->p_filesz);
 654                        if (retval != elf_ppnt->p_filesz) {
 655                                if (retval >= 0)
 656                                        retval = -EIO;
 657                                goto out_free_interp;
 658                        }
 659                        /* make sure path is NULL terminated */
 660                        retval = -ENOEXEC;
 661                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 662                                goto out_free_interp;
 663
 664                        interpreter = open_exec(elf_interpreter);
 665                        retval = PTR_ERR(interpreter);
 666                        if (IS_ERR(interpreter))
 667                                goto out_free_interp;
 668
 669                        /*
 670                         * If the binary is not readable then enforce
 671                         * mm->dumpable = 0 regardless of the interpreter's
 672                         * permissions.
 673                         */
 674                        would_dump(bprm, interpreter);
 675
 676                        retval = kernel_read(interpreter, 0, bprm->buf,
 677                                             BINPRM_BUF_SIZE);
 678                        if (retval != BINPRM_BUF_SIZE) {
 679                                if (retval >= 0)
 680                                        retval = -EIO;
 681                                goto out_free_dentry;
 682                        }
 683
 684                        /* Get the exec headers */
 685                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 686                        break;
 687                }
 688                elf_ppnt++;
 689        }
 690
 691        elf_ppnt = elf_phdata;
 692        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 693                if (elf_ppnt->p_type == PT_GNU_STACK) {
 694                        if (elf_ppnt->p_flags & PF_X)
 695                                executable_stack = EXSTACK_ENABLE_X;
 696                        else
 697                                executable_stack = EXSTACK_DISABLE_X;
 698                        break;
 699                }
 700
 701        /* Some simple consistency checks for the interpreter */
 702        if (elf_interpreter) {
 703                retval = -ELIBBAD;
 704                /* Not an ELF interpreter */
 705                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 706                        goto out_free_dentry;
 707                /* Verify the interpreter has a valid arch */
 708                if (!elf_check_arch(&loc->interp_elf_ex))
 709                        goto out_free_dentry;
 710        }
 711
 712        /* Flush all traces of the currently running executable */
 713        retval = flush_old_exec(bprm);
 714        if (retval)
 715                goto out_free_dentry;
 716
 717        /* OK, This is the point of no return */
 718        current->mm->def_flags = def_flags;
 719
 720        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 721           may depend on the personality.  */
 722        SET_PERSONALITY(loc->elf_ex);
 723        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 724                current->personality |= READ_IMPLIES_EXEC;
 725
 726        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 727                current->flags |= PF_RANDOMIZE;
 728
 729        setup_new_exec(bprm);
 730
 731        /* Do this so that we can load the interpreter, if need be.  We will
 732           change some of these later */
 733        current->mm->free_area_cache = current->mm->mmap_base;
 734        current->mm->cached_hole_size = 0;
 735        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 736                                 executable_stack);
 737        if (retval < 0) {
 738                send_sig(SIGKILL, current, 0);
 739                goto out_free_dentry;
 740        }
 741        
 742        current->mm->start_stack = bprm->p;
 743
 744        /* Now we do a little grungy work by mmapping the ELF image into
 745           the correct location in memory. */
 746        for(i = 0, elf_ppnt = elf_phdata;
 747            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 748                int elf_prot = 0, elf_flags;
 749                unsigned long k, vaddr;
 750
 751                if (elf_ppnt->p_type != PT_LOAD)
 752                        continue;
 753
 754                if (unlikely (elf_brk > elf_bss)) {
 755                        unsigned long nbyte;
 756                    
 757                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 758                           before this one. Map anonymous pages, if needed,
 759                           and clear the area.  */
 760                        retval = set_brk(elf_bss + load_bias,
 761                                         elf_brk + load_bias);
 762                        if (retval) {
 763                                send_sig(SIGKILL, current, 0);
 764                                goto out_free_dentry;
 765                        }
 766                        nbyte = ELF_PAGEOFFSET(elf_bss);
 767                        if (nbyte) {
 768                                nbyte = ELF_MIN_ALIGN - nbyte;
 769                                if (nbyte > elf_brk - elf_bss)
 770                                        nbyte = elf_brk - elf_bss;
 771                                if (clear_user((void __user *)elf_bss +
 772                                                        load_bias, nbyte)) {
 773                                        /*
 774                                         * This bss-zeroing can fail if the ELF
 775                                         * file specifies odd protections. So
 776                                         * we don't check the return value
 777                                         */
 778                                }
 779                        }
 780                }
 781
 782                if (elf_ppnt->p_flags & PF_R)
 783                        elf_prot |= PROT_READ;
 784                if (elf_ppnt->p_flags & PF_W)
 785                        elf_prot |= PROT_WRITE;
 786                if (elf_ppnt->p_flags & PF_X)
 787                        elf_prot |= PROT_EXEC;
 788
 789                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 790
 791                vaddr = elf_ppnt->p_vaddr;
 792                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 793                        elf_flags |= MAP_FIXED;
 794                } else if (loc->elf_ex.e_type == ET_DYN) {
 795                        /* Try and get dynamic programs out of the way of the
 796                         * default mmap base, as well as whatever program they
 797                         * might try to exec.  This is because the brk will
 798                         * follow the loader, and is not movable.  */
 799#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 800                        /* Memory randomization might have been switched off
 801                         * in runtime via sysctl.
 802                         * If that is the case, retain the original non-zero
 803                         * load_bias value in order to establish proper
 804                         * non-randomized mappings.
 805                         */
 806                        if (current->flags & PF_RANDOMIZE)
 807                                load_bias = 0;
 808                        else
 809                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 810#else
 811                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 812#endif
 813                }
 814
 815                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 816                                elf_prot, elf_flags, 0);
 817                if (BAD_ADDR(error)) {
 818                        send_sig(SIGKILL, current, 0);
 819                        retval = IS_ERR((void *)error) ?
 820                                PTR_ERR((void*)error) : -EINVAL;
 821                        goto out_free_dentry;
 822                }
 823
 824                if (!load_addr_set) {
 825                        load_addr_set = 1;
 826                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 827                        if (loc->elf_ex.e_type == ET_DYN) {
 828                                load_bias += error -
 829                                             ELF_PAGESTART(load_bias + vaddr);
 830                                load_addr += load_bias;
 831                                reloc_func_desc = load_bias;
 832                        }
 833                }
 834                k = elf_ppnt->p_vaddr;
 835                if (k < start_code)
 836                        start_code = k;
 837                if (start_data < k)
 838                        start_data = k;
 839
 840                /*
 841                 * Check to see if the section's size will overflow the
 842                 * allowed task size. Note that p_filesz must always be
 843                 * <= p_memsz so it is only necessary to check p_memsz.
 844                 */
 845                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 846                    elf_ppnt->p_memsz > TASK_SIZE ||
 847                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 848                        /* set_brk can never work. Avoid overflows. */
 849                        send_sig(SIGKILL, current, 0);
 850                        retval = -EINVAL;
 851                        goto out_free_dentry;
 852                }
 853
 854                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 855
 856                if (k > elf_bss)
 857                        elf_bss = k;
 858                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 859                        end_code = k;
 860                if (end_data < k)
 861                        end_data = k;
 862                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 863                if (k > elf_brk)
 864                        elf_brk = k;
 865        }
 866
 867        loc->elf_ex.e_entry += load_bias;
 868        elf_bss += load_bias;
 869        elf_brk += load_bias;
 870        start_code += load_bias;
 871        end_code += load_bias;
 872        start_data += load_bias;
 873        end_data += load_bias;
 874
 875        /* Calling set_brk effectively mmaps the pages that we need
 876         * for the bss and break sections.  We must do this before
 877         * mapping in the interpreter, to make sure it doesn't wind
 878         * up getting placed where the bss needs to go.
 879         */
 880        retval = set_brk(elf_bss, elf_brk);
 881        if (retval) {
 882                send_sig(SIGKILL, current, 0);
 883                goto out_free_dentry;
 884        }
 885        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 886                send_sig(SIGSEGV, current, 0);
 887                retval = -EFAULT; /* Nobody gets to see this, but.. */
 888                goto out_free_dentry;
 889        }
 890
 891        if (elf_interpreter) {
 892                unsigned long interp_map_addr = 0;
 893
 894                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 895                                            interpreter,
 896                                            &interp_map_addr,
 897                                            load_bias);
 898                if (!IS_ERR((void *)elf_entry)) {
 899                        /*
 900                         * load_elf_interp() returns relocation
 901                         * adjustment
 902                         */
 903                        interp_load_addr = elf_entry;
 904                        elf_entry += loc->interp_elf_ex.e_entry;
 905                }
 906                if (BAD_ADDR(elf_entry)) {
 907                        force_sig(SIGSEGV, current);
 908                        retval = IS_ERR((void *)elf_entry) ?
 909                                        (int)elf_entry : -EINVAL;
 910                        goto out_free_dentry;
 911                }
 912                reloc_func_desc = interp_load_addr;
 913
 914                allow_write_access(interpreter);
 915                fput(interpreter);
 916                kfree(elf_interpreter);
 917        } else {
 918                elf_entry = loc->elf_ex.e_entry;
 919                if (BAD_ADDR(elf_entry)) {
 920                        force_sig(SIGSEGV, current);
 921                        retval = -EINVAL;
 922                        goto out_free_dentry;
 923                }
 924        }
 925
 926        kfree(elf_phdata);
 927
 928        set_binfmt(&elf_format);
 929
 930#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 931        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 932        if (retval < 0) {
 933                send_sig(SIGKILL, current, 0);
 934                goto out;
 935        }
 936#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 937
 938        install_exec_creds(bprm);
 939        retval = create_elf_tables(bprm, &loc->elf_ex,
 940                          load_addr, interp_load_addr);
 941        if (retval < 0) {
 942                send_sig(SIGKILL, current, 0);
 943                goto out;
 944        }
 945        /* N.B. passed_fileno might not be initialized? */
 946        current->mm->end_code = end_code;
 947        current->mm->start_code = start_code;
 948        current->mm->start_data = start_data;
 949        current->mm->end_data = end_data;
 950        current->mm->start_stack = bprm->p;
 951
 952#ifdef arch_randomize_brk
 953        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 954                current->mm->brk = current->mm->start_brk =
 955                        arch_randomize_brk(current->mm);
 956#ifdef CONFIG_COMPAT_BRK
 957                current->brk_randomized = 1;
 958#endif
 959        }
 960#endif
 961
 962        if (current->personality & MMAP_PAGE_ZERO) {
 963                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 964                   and some applications "depend" upon this behavior.
 965                   Since we do not have the power to recompile these, we
 966                   emulate the SVr4 behavior. Sigh. */
 967                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 968                                MAP_FIXED | MAP_PRIVATE, 0);
 969        }
 970
 971#ifdef ELF_PLAT_INIT
 972        /*
 973         * The ABI may specify that certain registers be set up in special
 974         * ways (on i386 %edx is the address of a DT_FINI function, for
 975         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 976         * that the e_entry field is the address of the function descriptor
 977         * for the startup routine, rather than the address of the startup
 978         * routine itself.  This macro performs whatever initialization to
 979         * the regs structure is required as well as any relocations to the
 980         * function descriptor entries when executing dynamically links apps.
 981         */
 982        ELF_PLAT_INIT(regs, reloc_func_desc);
 983#endif
 984
 985        start_thread(regs, elf_entry, bprm->p);
 986        retval = 0;
 987out:
 988        kfree(loc);
 989out_ret:
 990        return retval;
 991
 992        /* error cleanup */
 993out_free_dentry:
 994        allow_write_access(interpreter);
 995        if (interpreter)
 996                fput(interpreter);
 997out_free_interp:
 998        kfree(elf_interpreter);
 999out_free_ph:
1000        kfree(elf_phdata);
1001        goto out;
1002}
1003
1004/* This is really simpleminded and specialized - we are loading an
1005   a.out library that is given an ELF header. */
1006static int load_elf_library(struct file *file)
1007{
1008        struct elf_phdr *elf_phdata;
1009        struct elf_phdr *eppnt;
1010        unsigned long elf_bss, bss, len;
1011        int retval, error, i, j;
1012        struct elfhdr elf_ex;
1013
1014        error = -ENOEXEC;
1015        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1016        if (retval != sizeof(elf_ex))
1017                goto out;
1018
1019        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1020                goto out;
1021
1022        /* First of all, some simple consistency checks */
1023        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1024            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1025                goto out;
1026
1027        /* Now read in all of the header information */
1028
1029        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1030        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1031
1032        error = -ENOMEM;
1033        elf_phdata = kmalloc(j, GFP_KERNEL);
1034        if (!elf_phdata)
1035                goto out;
1036
1037        eppnt = elf_phdata;
1038        error = -ENOEXEC;
1039        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1040        if (retval != j)
1041                goto out_free_ph;
1042
1043        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1044                if ((eppnt + i)->p_type == PT_LOAD)
1045                        j++;
1046        if (j != 1)
1047                goto out_free_ph;
1048
1049        while (eppnt->p_type != PT_LOAD)
1050                eppnt++;
1051
1052        /* Now use mmap to map the library into memory. */
1053        error = vm_mmap(file,
1054                        ELF_PAGESTART(eppnt->p_vaddr),
1055                        (eppnt->p_filesz +
1056                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1057                        PROT_READ | PROT_WRITE | PROT_EXEC,
1058                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1059                        (eppnt->p_offset -
1060                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1061        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1062                goto out_free_ph;
1063
1064        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1065        if (padzero(elf_bss)) {
1066                error = -EFAULT;
1067                goto out_free_ph;
1068        }
1069
1070        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1071                            ELF_MIN_ALIGN - 1);
1072        bss = eppnt->p_memsz + eppnt->p_vaddr;
1073        if (bss > len)
1074                vm_brk(len, bss - len);
1075        error = 0;
1076
1077out_free_ph:
1078        kfree(elf_phdata);
1079out:
1080        return error;
1081}
1082
1083#ifdef CONFIG_ELF_CORE
1084/*
1085 * ELF core dumper
1086 *
1087 * Modelled on fs/exec.c:aout_core_dump()
1088 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1089 */
1090
1091/*
1092 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1093 * that are useful for post-mortem analysis are included in every core dump.
1094 * In that way we ensure that the core dump is fully interpretable later
1095 * without matching up the same kernel and hardware config to see what PC values
1096 * meant. These special mappings include - vDSO, vsyscall, and other
1097 * architecture specific mappings
1098 */
1099static bool always_dump_vma(struct vm_area_struct *vma)
1100{
1101        /* Any vsyscall mappings? */
1102        if (vma == get_gate_vma(vma->vm_mm))
1103                return true;
1104        /*
1105         * arch_vma_name() returns non-NULL for special architecture mappings,
1106         * such as vDSO sections.
1107         */
1108        if (arch_vma_name(vma))
1109                return true;
1110
1111        return false;
1112}
1113
1114/*
1115 * Decide what to dump of a segment, part, all or none.
1116 */
1117static unsigned long vma_dump_size(struct vm_area_struct *vma,
1118                                   unsigned long mm_flags)
1119{
1120#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1121
1122        /* always dump the vdso and vsyscall sections */
1123        if (always_dump_vma(vma))
1124                goto whole;
1125
1126        if (vma->vm_flags & VM_DONTDUMP)
1127                return 0;
1128
1129        /* Hugetlb memory check */
1130        if (vma->vm_flags & VM_HUGETLB) {
1131                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1132                        goto whole;
1133                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1134                        goto whole;
1135        }
1136
1137        /* Do not dump I/O mapped devices or special mappings */
1138        if (vma->vm_flags & VM_IO)
1139                return 0;
1140
1141        /* By default, dump shared memory if mapped from an anonymous file. */
1142        if (vma->vm_flags & VM_SHARED) {
1143                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1144                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1145                        goto whole;
1146                return 0;
1147        }
1148
1149        /* Dump segments that have been written to.  */
1150        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1151                goto whole;
1152        if (vma->vm_file == NULL)
1153                return 0;
1154
1155        if (FILTER(MAPPED_PRIVATE))
1156                goto whole;
1157
1158        /*
1159         * If this looks like the beginning of a DSO or executable mapping,
1160         * check for an ELF header.  If we find one, dump the first page to
1161         * aid in determining what was mapped here.
1162         */
1163        if (FILTER(ELF_HEADERS) &&
1164            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1165                u32 __user *header = (u32 __user *) vma->vm_start;
1166                u32 word;
1167                mm_segment_t fs = get_fs();
1168                /*
1169                 * Doing it this way gets the constant folded by GCC.
1170                 */
1171                union {
1172                        u32 cmp;
1173                        char elfmag[SELFMAG];
1174                } magic;
1175                BUILD_BUG_ON(SELFMAG != sizeof word);
1176                magic.elfmag[EI_MAG0] = ELFMAG0;
1177                magic.elfmag[EI_MAG1] = ELFMAG1;
1178                magic.elfmag[EI_MAG2] = ELFMAG2;
1179                magic.elfmag[EI_MAG3] = ELFMAG3;
1180                /*
1181                 * Switch to the user "segment" for get_user(),
1182                 * then put back what elf_core_dump() had in place.
1183                 */
1184                set_fs(USER_DS);
1185                if (unlikely(get_user(word, header)))
1186                        word = 0;
1187                set_fs(fs);
1188                if (word == magic.cmp)
1189                        return PAGE_SIZE;
1190        }
1191
1192#undef  FILTER
1193
1194        return 0;
1195
1196whole:
1197        return vma->vm_end - vma->vm_start;
1198}
1199
1200/* An ELF note in memory */
1201struct memelfnote
1202{
1203        const char *name;
1204        int type;
1205        unsigned int datasz;
1206        void *data;
1207};
1208
1209static int notesize(struct memelfnote *en)
1210{
1211        int sz;
1212
1213        sz = sizeof(struct elf_note);
1214        sz += roundup(strlen(en->name) + 1, 4);
1215        sz += roundup(en->datasz, 4);
1216
1217        return sz;
1218}
1219
1220#define DUMP_WRITE(addr, nr, foffset)   \
1221        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1222
1223static int alignfile(struct file *file, loff_t *foffset)
1224{
1225        static const char buf[4] = { 0, };
1226        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1227        return 1;
1228}
1229
1230static int writenote(struct memelfnote *men, struct file *file,
1231                        loff_t *foffset)
1232{
1233        struct elf_note en;
1234        en.n_namesz = strlen(men->name) + 1;
1235        en.n_descsz = men->datasz;
1236        en.n_type = men->type;
1237
1238        DUMP_WRITE(&en, sizeof(en), foffset);
1239        DUMP_WRITE(men->name, en.n_namesz, foffset);
1240        if (!alignfile(file, foffset))
1241                return 0;
1242        DUMP_WRITE(men->data, men->datasz, foffset);
1243        if (!alignfile(file, foffset))
1244                return 0;
1245
1246        return 1;
1247}
1248#undef DUMP_WRITE
1249
1250static void fill_elf_header(struct elfhdr *elf, int segs,
1251                            u16 machine, u32 flags, u8 osabi)
1252{
1253        memset(elf, 0, sizeof(*elf));
1254
1255        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1256        elf->e_ident[EI_CLASS] = ELF_CLASS;
1257        elf->e_ident[EI_DATA] = ELF_DATA;
1258        elf->e_ident[EI_VERSION] = EV_CURRENT;
1259        elf->e_ident[EI_OSABI] = ELF_OSABI;
1260
1261        elf->e_type = ET_CORE;
1262        elf->e_machine = machine;
1263        elf->e_version = EV_CURRENT;
1264        elf->e_phoff = sizeof(struct elfhdr);
1265        elf->e_flags = flags;
1266        elf->e_ehsize = sizeof(struct elfhdr);
1267        elf->e_phentsize = sizeof(struct elf_phdr);
1268        elf->e_phnum = segs;
1269
1270        return;
1271}
1272
1273static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1274{
1275        phdr->p_type = PT_NOTE;
1276        phdr->p_offset = offset;
1277        phdr->p_vaddr = 0;
1278        phdr->p_paddr = 0;
1279        phdr->p_filesz = sz;
1280        phdr->p_memsz = 0;
1281        phdr->p_flags = 0;
1282        phdr->p_align = 0;
1283        return;
1284}
1285
1286static void fill_note(struct memelfnote *note, const char *name, int type, 
1287                unsigned int sz, void *data)
1288{
1289        note->name = name;
1290        note->type = type;
1291        note->datasz = sz;
1292        note->data = data;
1293        return;
1294}
1295
1296/*
1297 * fill up all the fields in prstatus from the given task struct, except
1298 * registers which need to be filled up separately.
1299 */
1300static void fill_prstatus(struct elf_prstatus *prstatus,
1301                struct task_struct *p, long signr)
1302{
1303        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1304        prstatus->pr_sigpend = p->pending.signal.sig[0];
1305        prstatus->pr_sighold = p->blocked.sig[0];
1306        rcu_read_lock();
1307        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1308        rcu_read_unlock();
1309        prstatus->pr_pid = task_pid_vnr(p);
1310        prstatus->pr_pgrp = task_pgrp_vnr(p);
1311        prstatus->pr_sid = task_session_vnr(p);
1312        if (thread_group_leader(p)) {
1313                struct task_cputime cputime;
1314
1315                /*
1316                 * This is the record for the group leader.  It shows the
1317                 * group-wide total, not its individual thread total.
1318                 */
1319                thread_group_cputime(p, &cputime);
1320                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1321                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1322        } else {
1323                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1324                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1325        }
1326        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1327        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1328}
1329
1330static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1331                       struct mm_struct *mm)
1332{
1333        const struct cred *cred;
1334        unsigned int i, len;
1335        
1336        /* first copy the parameters from user space */
1337        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1338
1339        len = mm->arg_end - mm->arg_start;
1340        if (len >= ELF_PRARGSZ)
1341                len = ELF_PRARGSZ-1;
1342        if (copy_from_user(&psinfo->pr_psargs,
1343                           (const char __user *)mm->arg_start, len))
1344                return -EFAULT;
1345        for(i = 0; i < len; i++)
1346                if (psinfo->pr_psargs[i] == 0)
1347                        psinfo->pr_psargs[i] = ' ';
1348        psinfo->pr_psargs[len] = 0;
1349
1350        rcu_read_lock();
1351        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1352        rcu_read_unlock();
1353        psinfo->pr_pid = task_pid_vnr(p);
1354        psinfo->pr_pgrp = task_pgrp_vnr(p);
1355        psinfo->pr_sid = task_session_vnr(p);
1356
1357        i = p->state ? ffz(~p->state) + 1 : 0;
1358        psinfo->pr_state = i;
1359        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1360        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1361        psinfo->pr_nice = task_nice(p);
1362        psinfo->pr_flag = p->flags;
1363        rcu_read_lock();
1364        cred = __task_cred(p);
1365        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1366        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1367        rcu_read_unlock();
1368        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1369        
1370        return 0;
1371}
1372
1373static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1374{
1375        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1376        int i = 0;
1377        do
1378                i += 2;
1379        while (auxv[i - 2] != AT_NULL);
1380        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1381}
1382
1383static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1384                siginfo_t *siginfo)
1385{
1386        mm_segment_t old_fs = get_fs();
1387        set_fs(KERNEL_DS);
1388        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1389        set_fs(old_fs);
1390        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1391}
1392
1393#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1394/*
1395 * Format of NT_FILE note:
1396 *
1397 * long count     -- how many files are mapped
1398 * long page_size -- units for file_ofs
1399 * array of [COUNT] elements of
1400 *   long start
1401 *   long end
1402 *   long file_ofs
1403 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1404 */
1405static void fill_files_note(struct memelfnote *note)
1406{
1407        struct vm_area_struct *vma;
1408        unsigned count, size, names_ofs, remaining, n;
1409        user_long_t *data;
1410        user_long_t *start_end_ofs;
1411        char *name_base, *name_curpos;
1412
1413        /* *Estimated* file count and total data size needed */
1414        count = current->mm->map_count;
1415        size = count * 64;
1416
1417        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1418 alloc:
1419        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1420                goto err;
1421        size = round_up(size, PAGE_SIZE);
1422        data = vmalloc(size);
1423        if (!data)
1424                goto err;
1425
1426        start_end_ofs = data + 2;
1427        name_base = name_curpos = ((char *)data) + names_ofs;
1428        remaining = size - names_ofs;
1429        count = 0;
1430        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1431                struct file *file;
1432                const char *filename;
1433
1434                file = vma->vm_file;
1435                if (!file)
1436                        continue;
1437                filename = d_path(&file->f_path, name_curpos, remaining);
1438                if (IS_ERR(filename)) {
1439                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1440                                vfree(data);
1441                                size = size * 5 / 4;
1442                                goto alloc;
1443                        }
1444                        continue;
1445                }
1446
1447                /* d_path() fills at the end, move name down */
1448                /* n = strlen(filename) + 1: */
1449                n = (name_curpos + remaining) - filename;
1450                remaining = filename - name_curpos;
1451                memmove(name_curpos, filename, n);
1452                name_curpos += n;
1453
1454                *start_end_ofs++ = vma->vm_start;
1455                *start_end_ofs++ = vma->vm_end;
1456                *start_end_ofs++ = vma->vm_pgoff;
1457                count++;
1458        }
1459
1460        /* Now we know exact count of files, can store it */
1461        data[0] = count;
1462        data[1] = PAGE_SIZE;
1463        /*
1464         * Count usually is less than current->mm->map_count,
1465         * we need to move filenames down.
1466         */
1467        n = current->mm->map_count - count;
1468        if (n != 0) {
1469                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1470                memmove(name_base - shift_bytes, name_base,
1471                        name_curpos - name_base);
1472                name_curpos -= shift_bytes;
1473        }
1474
1475        size = name_curpos - (char *)data;
1476        fill_note(note, "CORE", NT_FILE, size, data);
1477 err: ;
1478}
1479
1480#ifdef CORE_DUMP_USE_REGSET
1481#include <linux/regset.h>
1482
1483struct elf_thread_core_info {
1484        struct elf_thread_core_info *next;
1485        struct task_struct *task;
1486        struct elf_prstatus prstatus;
1487        struct memelfnote notes[0];
1488};
1489
1490struct elf_note_info {
1491        struct elf_thread_core_info *thread;
1492        struct memelfnote psinfo;
1493        struct memelfnote signote;
1494        struct memelfnote auxv;
1495        struct memelfnote files;
1496        user_siginfo_t csigdata;
1497        size_t size;
1498        int thread_notes;
1499};
1500
1501/*
1502 * When a regset has a writeback hook, we call it on each thread before
1503 * dumping user memory.  On register window machines, this makes sure the
1504 * user memory backing the register data is up to date before we read it.
1505 */
1506static void do_thread_regset_writeback(struct task_struct *task,
1507                                       const struct user_regset *regset)
1508{
1509        if (regset->writeback)
1510                regset->writeback(task, regset, 1);
1511}
1512
1513#ifndef PR_REG_SIZE
1514#define PR_REG_SIZE(S) sizeof(S)
1515#endif
1516
1517#ifndef PRSTATUS_SIZE
1518#define PRSTATUS_SIZE(S) sizeof(S)
1519#endif
1520
1521#ifndef PR_REG_PTR
1522#define PR_REG_PTR(S) (&((S)->pr_reg))
1523#endif
1524
1525#ifndef SET_PR_FPVALID
1526#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1527#endif
1528
1529static int fill_thread_core_info(struct elf_thread_core_info *t,
1530                                 const struct user_regset_view *view,
1531                                 long signr, size_t *total)
1532{
1533        unsigned int i;
1534
1535        /*
1536         * NT_PRSTATUS is the one special case, because the regset data
1537         * goes into the pr_reg field inside the note contents, rather
1538         * than being the whole note contents.  We fill the reset in here.
1539         * We assume that regset 0 is NT_PRSTATUS.
1540         */
1541        fill_prstatus(&t->prstatus, t->task, signr);
1542        (void) view->regsets[0].get(t->task, &view->regsets[0],
1543                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1544                                    PR_REG_PTR(&t->prstatus), NULL);
1545
1546        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1547                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1548        *total += notesize(&t->notes[0]);
1549
1550        do_thread_regset_writeback(t->task, &view->regsets[0]);
1551
1552        /*
1553         * Each other regset might generate a note too.  For each regset
1554         * that has no core_note_type or is inactive, we leave t->notes[i]
1555         * all zero and we'll know to skip writing it later.
1556         */
1557        for (i = 1; i < view->n; ++i) {
1558                const struct user_regset *regset = &view->regsets[i];
1559                do_thread_regset_writeback(t->task, regset);
1560                if (regset->core_note_type && regset->get &&
1561                    (!regset->active || regset->active(t->task, regset))) {
1562                        int ret;
1563                        size_t size = regset->n * regset->size;
1564                        void *data = kmalloc(size, GFP_KERNEL);
1565                        if (unlikely(!data))
1566                                return 0;
1567                        ret = regset->get(t->task, regset,
1568                                          0, size, data, NULL);
1569                        if (unlikely(ret))
1570                                kfree(data);
1571                        else {
1572                                if (regset->core_note_type != NT_PRFPREG)
1573                                        fill_note(&t->notes[i], "LINUX",
1574                                                  regset->core_note_type,
1575                                                  size, data);
1576                                else {
1577                                        SET_PR_FPVALID(&t->prstatus, 1);
1578                                        fill_note(&t->notes[i], "CORE",
1579                                                  NT_PRFPREG, size, data);
1580                                }
1581                                *total += notesize(&t->notes[i]);
1582                        }
1583                }
1584        }
1585
1586        return 1;
1587}
1588
1589static int fill_note_info(struct elfhdr *elf, int phdrs,
1590                          struct elf_note_info *info,
1591                          siginfo_t *siginfo, struct pt_regs *regs)
1592{
1593        struct task_struct *dump_task = current;
1594        const struct user_regset_view *view = task_user_regset_view(dump_task);
1595        struct elf_thread_core_info *t;
1596        struct elf_prpsinfo *psinfo;
1597        struct core_thread *ct;
1598        unsigned int i;
1599
1600        info->size = 0;
1601        info->thread = NULL;
1602
1603        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1604        if (psinfo == NULL) {
1605                info->psinfo.data = NULL; /* So we don't free this wrongly */
1606                return 0;
1607        }
1608
1609        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1610
1611        /*
1612         * Figure out how many notes we're going to need for each thread.
1613         */
1614        info->thread_notes = 0;
1615        for (i = 0; i < view->n; ++i)
1616                if (view->regsets[i].core_note_type != 0)
1617                        ++info->thread_notes;
1618
1619        /*
1620         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1621         * since it is our one special case.
1622         */
1623        if (unlikely(info->thread_notes == 0) ||
1624            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1625                WARN_ON(1);
1626                return 0;
1627        }
1628
1629        /*
1630         * Initialize the ELF file header.
1631         */
1632        fill_elf_header(elf, phdrs,
1633                        view->e_machine, view->e_flags, view->ei_osabi);
1634
1635        /*
1636         * Allocate a structure for each thread.
1637         */
1638        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1639                t = kzalloc(offsetof(struct elf_thread_core_info,
1640                                     notes[info->thread_notes]),
1641                            GFP_KERNEL);
1642                if (unlikely(!t))
1643                        return 0;
1644
1645                t->task = ct->task;
1646                if (ct->task == dump_task || !info->thread) {
1647                        t->next = info->thread;
1648                        info->thread = t;
1649                } else {
1650                        /*
1651                         * Make sure to keep the original task at
1652                         * the head of the list.
1653                         */
1654                        t->next = info->thread->next;
1655                        info->thread->next = t;
1656                }
1657        }
1658
1659        /*
1660         * Now fill in each thread's information.
1661         */
1662        for (t = info->thread; t != NULL; t = t->next)
1663                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1664                        return 0;
1665
1666        /*
1667         * Fill in the two process-wide notes.
1668         */
1669        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1670        info->size += notesize(&info->psinfo);
1671
1672        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1673        info->size += notesize(&info->signote);
1674
1675        fill_auxv_note(&info->auxv, current->mm);
1676        info->size += notesize(&info->auxv);
1677
1678        fill_files_note(&info->files);
1679        info->size += notesize(&info->files);
1680
1681        return 1;
1682}
1683
1684static size_t get_note_info_size(struct elf_note_info *info)
1685{
1686        return info->size;
1687}
1688
1689/*
1690 * Write all the notes for each thread.  When writing the first thread, the
1691 * process-wide notes are interleaved after the first thread-specific note.
1692 */
1693static int write_note_info(struct elf_note_info *info,
1694                           struct file *file, loff_t *foffset)
1695{
1696        bool first = 1;
1697        struct elf_thread_core_info *t = info->thread;
1698
1699        do {
1700                int i;
1701
1702                if (!writenote(&t->notes[0], file, foffset))
1703                        return 0;
1704
1705                if (first && !writenote(&info->psinfo, file, foffset))
1706                        return 0;
1707                if (first && !writenote(&info->signote, file, foffset))
1708                        return 0;
1709                if (first && !writenote(&info->auxv, file, foffset))
1710                        return 0;
1711                if (first && !writenote(&info->files, file, foffset))
1712                        return 0;
1713
1714                for (i = 1; i < info->thread_notes; ++i)
1715                        if (t->notes[i].data &&
1716                            !writenote(&t->notes[i], file, foffset))
1717                                return 0;
1718
1719                first = 0;
1720                t = t->next;
1721        } while (t);
1722
1723        return 1;
1724}
1725
1726static void free_note_info(struct elf_note_info *info)
1727{
1728        struct elf_thread_core_info *threads = info->thread;
1729        while (threads) {
1730                unsigned int i;
1731                struct elf_thread_core_info *t = threads;
1732                threads = t->next;
1733                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1734                for (i = 1; i < info->thread_notes; ++i)
1735                        kfree(t->notes[i].data);
1736                kfree(t);
1737        }
1738        kfree(info->psinfo.data);
1739        vfree(info->files.data);
1740}
1741
1742#else
1743
1744/* Here is the structure in which status of each thread is captured. */
1745struct elf_thread_status
1746{
1747        struct list_head list;
1748        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1749        elf_fpregset_t fpu;             /* NT_PRFPREG */
1750        struct task_struct *thread;
1751#ifdef ELF_CORE_COPY_XFPREGS
1752        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1753#endif
1754        struct memelfnote notes[3];
1755        int num_notes;
1756};
1757
1758/*
1759 * In order to add the specific thread information for the elf file format,
1760 * we need to keep a linked list of every threads pr_status and then create
1761 * a single section for them in the final core file.
1762 */
1763static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1764{
1765        int sz = 0;
1766        struct task_struct *p = t->thread;
1767        t->num_notes = 0;
1768
1769        fill_prstatus(&t->prstatus, p, signr);
1770        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1771        
1772        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1773                  &(t->prstatus));
1774        t->num_notes++;
1775        sz += notesize(&t->notes[0]);
1776
1777        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1778                                                                &t->fpu))) {
1779                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1780                          &(t->fpu));
1781                t->num_notes++;
1782                sz += notesize(&t->notes[1]);
1783        }
1784
1785#ifdef ELF_CORE_COPY_XFPREGS
1786        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1787                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1788                          sizeof(t->xfpu), &t->xfpu);
1789                t->num_notes++;
1790                sz += notesize(&t->notes[2]);
1791        }
1792#endif  
1793        return sz;
1794}
1795
1796struct elf_note_info {
1797        struct memelfnote *notes;
1798        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1799        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1800        struct list_head thread_list;
1801        elf_fpregset_t *fpu;
1802#ifdef ELF_CORE_COPY_XFPREGS
1803        elf_fpxregset_t *xfpu;
1804#endif
1805        user_siginfo_t csigdata;
1806        int thread_status_size;
1807        int numnote;
1808};
1809
1810static int elf_note_info_init(struct elf_note_info *info)
1811{
1812        memset(info, 0, sizeof(*info));
1813        INIT_LIST_HEAD(&info->thread_list);
1814
1815        /* Allocate space for ELF notes */
1816        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1817        if (!info->notes)
1818                return 0;
1819        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1820        if (!info->psinfo)
1821                return 0;
1822        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1823        if (!info->prstatus)
1824                return 0;
1825        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1826        if (!info->fpu)
1827                return 0;
1828#ifdef ELF_CORE_COPY_XFPREGS
1829        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1830        if (!info->xfpu)
1831                return 0;
1832#endif
1833        return 1;
1834}
1835
1836static int fill_note_info(struct elfhdr *elf, int phdrs,
1837                          struct elf_note_info *info,
1838                          siginfo_t *siginfo, struct pt_regs *regs)
1839{
1840        struct list_head *t;
1841
1842        if (!elf_note_info_init(info))
1843                return 0;
1844
1845        if (siginfo->si_signo) {
1846                struct core_thread *ct;
1847                struct elf_thread_status *ets;
1848
1849                for (ct = current->mm->core_state->dumper.next;
1850                                                ct; ct = ct->next) {
1851                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1852                        if (!ets)
1853                                return 0;
1854
1855                        ets->thread = ct->task;
1856                        list_add(&ets->list, &info->thread_list);
1857                }
1858
1859                list_for_each(t, &info->thread_list) {
1860                        int sz;
1861
1862                        ets = list_entry(t, struct elf_thread_status, list);
1863                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1864                        info->thread_status_size += sz;
1865                }
1866        }
1867        /* now collect the dump for the current */
1868        memset(info->prstatus, 0, sizeof(*info->prstatus));
1869        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1870        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1871
1872        /* Set up header */
1873        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1874
1875        /*
1876         * Set up the notes in similar form to SVR4 core dumps made
1877         * with info from their /proc.
1878         */
1879
1880        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1881                  sizeof(*info->prstatus), info->prstatus);
1882        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1883        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1884                  sizeof(*info->psinfo), info->psinfo);
1885
1886        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1887        fill_auxv_note(info->notes + 3, current->mm);
1888        fill_files_note(info->notes + 4);
1889
1890        info->numnote = 5;
1891
1892        /* Try to dump the FPU. */
1893        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1894                                                               info->fpu);
1895        if (info->prstatus->pr_fpvalid)
1896                fill_note(info->notes + info->numnote++,
1897                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1898#ifdef ELF_CORE_COPY_XFPREGS
1899        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1900                fill_note(info->notes + info->numnote++,
1901                          "LINUX", ELF_CORE_XFPREG_TYPE,
1902                          sizeof(*info->xfpu), info->xfpu);
1903#endif
1904
1905        return 1;
1906}
1907
1908static size_t get_note_info_size(struct elf_note_info *info)
1909{
1910        int sz = 0;
1911        int i;
1912
1913        for (i = 0; i < info->numnote; i++)
1914                sz += notesize(info->notes + i);
1915
1916        sz += info->thread_status_size;
1917
1918        return sz;
1919}
1920
1921static int write_note_info(struct elf_note_info *info,
1922                           struct file *file, loff_t *foffset)
1923{
1924        int i;
1925        struct list_head *t;
1926
1927        for (i = 0; i < info->numnote; i++)
1928                if (!writenote(info->notes + i, file, foffset))
1929                        return 0;
1930
1931        /* write out the thread status notes section */
1932        list_for_each(t, &info->thread_list) {
1933                struct elf_thread_status *tmp =
1934                                list_entry(t, struct elf_thread_status, list);
1935
1936                for (i = 0; i < tmp->num_notes; i++)
1937                        if (!writenote(&tmp->notes[i], file, foffset))
1938                                return 0;
1939        }
1940
1941        return 1;
1942}
1943
1944static void free_note_info(struct elf_note_info *info)
1945{
1946        while (!list_empty(&info->thread_list)) {
1947                struct list_head *tmp = info->thread_list.next;
1948                list_del(tmp);
1949                kfree(list_entry(tmp, struct elf_thread_status, list));
1950        }
1951
1952        /* Free data allocated by fill_files_note(): */
1953        vfree(info->notes[4].data);
1954
1955        kfree(info->prstatus);
1956        kfree(info->psinfo);
1957        kfree(info->notes);
1958        kfree(info->fpu);
1959#ifdef ELF_CORE_COPY_XFPREGS
1960        kfree(info->xfpu);
1961#endif
1962}
1963
1964#endif
1965
1966static struct vm_area_struct *first_vma(struct task_struct *tsk,
1967                                        struct vm_area_struct *gate_vma)
1968{
1969        struct vm_area_struct *ret = tsk->mm->mmap;
1970
1971        if (ret)
1972                return ret;
1973        return gate_vma;
1974}
1975/*
1976 * Helper function for iterating across a vma list.  It ensures that the caller
1977 * will visit `gate_vma' prior to terminating the search.
1978 */
1979static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1980                                        struct vm_area_struct *gate_vma)
1981{
1982        struct vm_area_struct *ret;
1983
1984        ret = this_vma->vm_next;
1985        if (ret)
1986                return ret;
1987        if (this_vma == gate_vma)
1988                return NULL;
1989        return gate_vma;
1990}
1991
1992static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1993                             elf_addr_t e_shoff, int segs)
1994{
1995        elf->e_shoff = e_shoff;
1996        elf->e_shentsize = sizeof(*shdr4extnum);
1997        elf->e_shnum = 1;
1998        elf->e_shstrndx = SHN_UNDEF;
1999
2000        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2001
2002        shdr4extnum->sh_type = SHT_NULL;
2003        shdr4extnum->sh_size = elf->e_shnum;
2004        shdr4extnum->sh_link = elf->e_shstrndx;
2005        shdr4extnum->sh_info = segs;
2006}
2007
2008static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2009                                     unsigned long mm_flags)
2010{
2011        struct vm_area_struct *vma;
2012        size_t size = 0;
2013
2014        for (vma = first_vma(current, gate_vma); vma != NULL;
2015             vma = next_vma(vma, gate_vma))
2016                size += vma_dump_size(vma, mm_flags);
2017        return size;
2018}
2019
2020/*
2021 * Actual dumper
2022 *
2023 * This is a two-pass process; first we find the offsets of the bits,
2024 * and then they are actually written out.  If we run out of core limit
2025 * we just truncate.
2026 */
2027static int elf_core_dump(struct coredump_params *cprm)
2028{
2029        int has_dumped = 0;
2030        mm_segment_t fs;
2031        int segs;
2032        size_t size = 0;
2033        struct vm_area_struct *vma, *gate_vma;
2034        struct elfhdr *elf = NULL;
2035        loff_t offset = 0, dataoff, foffset;
2036        struct elf_note_info info;
2037        struct elf_phdr *phdr4note = NULL;
2038        struct elf_shdr *shdr4extnum = NULL;
2039        Elf_Half e_phnum;
2040        elf_addr_t e_shoff;
2041
2042        /*
2043         * We no longer stop all VM operations.
2044         * 
2045         * This is because those proceses that could possibly change map_count
2046         * or the mmap / vma pages are now blocked in do_exit on current
2047         * finishing this core dump.
2048         *
2049         * Only ptrace can touch these memory addresses, but it doesn't change
2050         * the map_count or the pages allocated. So no possibility of crashing
2051         * exists while dumping the mm->vm_next areas to the core file.
2052         */
2053  
2054        /* alloc memory for large data structures: too large to be on stack */
2055        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2056        if (!elf)
2057                goto out;
2058        /*
2059         * The number of segs are recored into ELF header as 16bit value.
2060         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2061         */
2062        segs = current->mm->map_count;
2063        segs += elf_core_extra_phdrs();
2064
2065        gate_vma = get_gate_vma(current->mm);
2066        if (gate_vma != NULL)
2067                segs++;
2068
2069        /* for notes section */
2070        segs++;
2071
2072        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2073         * this, kernel supports extended numbering. Have a look at
2074         * include/linux/elf.h for further information. */
2075        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2076
2077        /*
2078         * Collect all the non-memory information about the process for the
2079         * notes.  This also sets up the file header.
2080         */
2081        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2082                goto cleanup;
2083
2084        has_dumped = 1;
2085        current->flags |= PF_DUMPCORE;
2086  
2087        fs = get_fs();
2088        set_fs(KERNEL_DS);
2089
2090        offset += sizeof(*elf);                         /* Elf header */
2091        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2092        foffset = offset;
2093
2094        /* Write notes phdr entry */
2095        {
2096                size_t sz = get_note_info_size(&info);
2097
2098                sz += elf_coredump_extra_notes_size();
2099
2100                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2101                if (!phdr4note)
2102                        goto end_coredump;
2103
2104                fill_elf_note_phdr(phdr4note, sz, offset);
2105                offset += sz;
2106        }
2107
2108        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2109
2110        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2111        offset += elf_core_extra_data_size();
2112        e_shoff = offset;
2113
2114        if (e_phnum == PN_XNUM) {
2115                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2116                if (!shdr4extnum)
2117                        goto end_coredump;
2118                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2119        }
2120
2121        offset = dataoff;
2122
2123        size += sizeof(*elf);
2124        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2125                goto end_coredump;
2126
2127        size += sizeof(*phdr4note);
2128        if (size > cprm->limit
2129            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2130                goto end_coredump;
2131
2132        /* Write program headers for segments dump */
2133        for (vma = first_vma(current, gate_vma); vma != NULL;
2134                        vma = next_vma(vma, gate_vma)) {
2135                struct elf_phdr phdr;
2136
2137                phdr.p_type = PT_LOAD;
2138                phdr.p_offset = offset;
2139                phdr.p_vaddr = vma->vm_start;
2140                phdr.p_paddr = 0;
2141                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2142                phdr.p_memsz = vma->vm_end - vma->vm_start;
2143                offset += phdr.p_filesz;
2144                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2145                if (vma->vm_flags & VM_WRITE)
2146                        phdr.p_flags |= PF_W;
2147                if (vma->vm_flags & VM_EXEC)
2148                        phdr.p_flags |= PF_X;
2149                phdr.p_align = ELF_EXEC_PAGESIZE;
2150
2151                size += sizeof(phdr);
2152                if (size > cprm->limit
2153                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2154                        goto end_coredump;
2155        }
2156
2157        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2158                goto end_coredump;
2159
2160        /* write out the notes section */
2161        if (!write_note_info(&info, cprm->file, &foffset))
2162                goto end_coredump;
2163
2164        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2165                goto end_coredump;
2166
2167        /* Align to page */
2168        if (!dump_seek(cprm->file, dataoff - foffset))
2169                goto end_coredump;
2170
2171        for (vma = first_vma(current, gate_vma); vma != NULL;
2172                        vma = next_vma(vma, gate_vma)) {
2173                unsigned long addr;
2174                unsigned long end;
2175
2176                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2177
2178                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2179                        struct page *page;
2180                        int stop;
2181
2182                        page = get_dump_page(addr);
2183                        if (page) {
2184                                void *kaddr = kmap(page);
2185                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2186                                        !dump_write(cprm->file, kaddr,
2187                                                    PAGE_SIZE);
2188                                kunmap(page);
2189                                page_cache_release(page);
2190                        } else
2191                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2192                        if (stop)
2193                                goto end_coredump;
2194                }
2195        }
2196
2197        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2198                goto end_coredump;
2199
2200        if (e_phnum == PN_XNUM) {
2201                size += sizeof(*shdr4extnum);
2202                if (size > cprm->limit
2203                    || !dump_write(cprm->file, shdr4extnum,
2204                                   sizeof(*shdr4extnum)))
2205                        goto end_coredump;
2206        }
2207
2208end_coredump:
2209        set_fs(fs);
2210
2211cleanup:
2212        free_note_info(&info);
2213        kfree(shdr4extnum);
2214        kfree(phdr4note);
2215        kfree(elf);
2216out:
2217        return has_dumped;
2218}
2219
2220#endif          /* CONFIG_ELF_CORE */
2221
2222static int __init init_elf_binfmt(void)
2223{
2224        register_binfmt(&elf_format);
2225        return 0;
2226}
2227
2228static void __exit exit_elf_binfmt(void)
2229{
2230        /* Remove the COFF and ELF loaders. */
2231        unregister_binfmt(&elf_format);
2232}
2233
2234core_initcall(init_elf_binfmt);
2235module_exit(exit_elf_binfmt);
2236MODULE_LICENSE("GPL");
2237
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.