linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/utsname.h>
  35#include <linux/coredump.h>
  36#include <linux/sched.h>
  37#include <asm/uaccess.h>
  38#include <asm/param.h>
  39#include <asm/page.h>
  40
  41#ifndef user_long_t
  42#define user_long_t long
  43#endif
  44#ifndef user_siginfo_t
  45#define user_siginfo_t siginfo_t
  46#endif
  47
  48static int load_elf_binary(struct linux_binprm *bprm);
  49static int load_elf_library(struct file *);
  50static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  51                                int, int, unsigned long);
  52
  53/*
  54 * If we don't support core dumping, then supply a NULL so we
  55 * don't even try.
  56 */
  57#ifdef CONFIG_ELF_CORE
  58static int elf_core_dump(struct coredump_params *cprm);
  59#else
  60#define elf_core_dump   NULL
  61#endif
  62
  63#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  64#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  65#else
  66#define ELF_MIN_ALIGN   PAGE_SIZE
  67#endif
  68
  69#ifndef ELF_CORE_EFLAGS
  70#define ELF_CORE_EFLAGS 0
  71#endif
  72
  73#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  74#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  75#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  76
  77static struct linux_binfmt elf_format = {
  78        .module         = THIS_MODULE,
  79        .load_binary    = load_elf_binary,
  80        .load_shlib     = load_elf_library,
  81        .core_dump      = elf_core_dump,
  82        .min_coredump   = ELF_EXEC_PAGESIZE,
  83};
  84
  85#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  86
  87static int set_brk(unsigned long start, unsigned long end)
  88{
  89        start = ELF_PAGEALIGN(start);
  90        end = ELF_PAGEALIGN(end);
  91        if (end > start) {
  92                unsigned long addr;
  93                addr = vm_brk(start, end - start);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a little clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134#ifndef ELF_BASE_PLATFORM
 135/*
 136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 138 * will be copied to the user stack in the same manner as AT_PLATFORM.
 139 */
 140#define ELF_BASE_PLATFORM NULL
 141#endif
 142
 143static int
 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 145                unsigned long load_addr, unsigned long interp_load_addr)
 146{
 147        unsigned long p = bprm->p;
 148        int argc = bprm->argc;
 149        int envc = bprm->envc;
 150        elf_addr_t __user *argv;
 151        elf_addr_t __user *envp;
 152        elf_addr_t __user *sp;
 153        elf_addr_t __user *u_platform;
 154        elf_addr_t __user *u_base_platform;
 155        elf_addr_t __user *u_rand_bytes;
 156        const char *k_platform = ELF_PLATFORM;
 157        const char *k_base_platform = ELF_BASE_PLATFORM;
 158        unsigned char k_rand_bytes[16];
 159        int items;
 160        elf_addr_t *elf_info;
 161        int ei_index = 0;
 162        const struct cred *cred = current_cred();
 163        struct vm_area_struct *vma;
 164
 165        /*
 166         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 167         * evictions by the processes running on the same package. One
 168         * thing we can do is to shuffle the initial stack for them.
 169         */
 170
 171        p = arch_align_stack(p);
 172
 173        /*
 174         * If this architecture has a platform capability string, copy it
 175         * to userspace.  In some cases (Sparc), this info is impossible
 176         * for userspace to get any other way, in others (i386) it is
 177         * merely difficult.
 178         */
 179        u_platform = NULL;
 180        if (k_platform) {
 181                size_t len = strlen(k_platform) + 1;
 182
 183                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 184                if (__copy_to_user(u_platform, k_platform, len))
 185                        return -EFAULT;
 186        }
 187
 188        /*
 189         * If this architecture has a "base" platform capability
 190         * string, copy it to userspace.
 191         */
 192        u_base_platform = NULL;
 193        if (k_base_platform) {
 194                size_t len = strlen(k_base_platform) + 1;
 195
 196                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 197                if (__copy_to_user(u_base_platform, k_base_platform, len))
 198                        return -EFAULT;
 199        }
 200
 201        /*
 202         * Generate 16 random bytes for userspace PRNG seeding.
 203         */
 204        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 205        u_rand_bytes = (elf_addr_t __user *)
 206                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 207        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 208                return -EFAULT;
 209
 210        /* Create the ELF interpreter info */
 211        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 212        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 213#define NEW_AUX_ENT(id, val) \
 214        do { \
 215                elf_info[ei_index++] = id; \
 216                elf_info[ei_index++] = val; \
 217        } while (0)
 218
 219#ifdef ARCH_DLINFO
 220        /* 
 221         * ARCH_DLINFO must come first so PPC can do its special alignment of
 222         * AUXV.
 223         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 224         * ARCH_DLINFO changes
 225         */
 226        ARCH_DLINFO;
 227#endif
 228        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 229        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 230        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 231        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 232        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 233        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 234        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 235        NEW_AUX_ENT(AT_FLAGS, 0);
 236        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 237        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 238        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 239        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 240        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 241        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 242        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 243        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 244        if (k_platform) {
 245                NEW_AUX_ENT(AT_PLATFORM,
 246                            (elf_addr_t)(unsigned long)u_platform);
 247        }
 248        if (k_base_platform) {
 249                NEW_AUX_ENT(AT_BASE_PLATFORM,
 250                            (elf_addr_t)(unsigned long)u_base_platform);
 251        }
 252        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 253                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 254        }
 255#undef NEW_AUX_ENT
 256        /* AT_NULL is zero; clear the rest too */
 257        memset(&elf_info[ei_index], 0,
 258               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 259
 260        /* And advance past the AT_NULL entry.  */
 261        ei_index += 2;
 262
 263        sp = STACK_ADD(p, ei_index);
 264
 265        items = (argc + 1) + (envc + 1) + 1;
 266        bprm->p = STACK_ROUND(sp, items);
 267
 268        /* Point sp at the lowest address on the stack */
 269#ifdef CONFIG_STACK_GROWSUP
 270        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 271        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 272#else
 273        sp = (elf_addr_t __user *)bprm->p;
 274#endif
 275
 276
 277        /*
 278         * Grow the stack manually; some architectures have a limit on how
 279         * far ahead a user-space access may be in order to grow the stack.
 280         */
 281        vma = find_extend_vma(current->mm, bprm->p);
 282        if (!vma)
 283                return -EFAULT;
 284
 285        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 286        if (__put_user(argc, sp++))
 287                return -EFAULT;
 288        argv = sp;
 289        envp = argv + argc + 1;
 290
 291        /* Populate argv and envp */
 292        p = current->mm->arg_end = current->mm->arg_start;
 293        while (argc-- > 0) {
 294                size_t len;
 295                if (__put_user((elf_addr_t)p, argv++))
 296                        return -EFAULT;
 297                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 298                if (!len || len > MAX_ARG_STRLEN)
 299                        return -EINVAL;
 300                p += len;
 301        }
 302        if (__put_user(0, argv))
 303                return -EFAULT;
 304        current->mm->arg_end = current->mm->env_start = p;
 305        while (envc-- > 0) {
 306                size_t len;
 307                if (__put_user((elf_addr_t)p, envp++))
 308                        return -EFAULT;
 309                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 310                if (!len || len > MAX_ARG_STRLEN)
 311                        return -EINVAL;
 312                p += len;
 313        }
 314        if (__put_user(0, envp))
 315                return -EFAULT;
 316        current->mm->env_end = p;
 317
 318        /* Put the elf_info on the stack in the right place.  */
 319        sp = (elf_addr_t __user *)envp + 1;
 320        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 321                return -EFAULT;
 322        return 0;
 323}
 324
 325#ifndef elf_map
 326
 327static unsigned long elf_map(struct file *filep, unsigned long addr,
 328                struct elf_phdr *eppnt, int prot, int type,
 329                unsigned long total_size)
 330{
 331        unsigned long map_addr;
 332        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 333        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 334        addr = ELF_PAGESTART(addr);
 335        size = ELF_PAGEALIGN(size);
 336
 337        /* mmap() will return -EINVAL if given a zero size, but a
 338         * segment with zero filesize is perfectly valid */
 339        if (!size)
 340                return addr;
 341
 342        /*
 343        * total_size is the size of the ELF (interpreter) image.
 344        * The _first_ mmap needs to know the full size, otherwise
 345        * randomization might put this image into an overlapping
 346        * position with the ELF binary image. (since size < total_size)
 347        * So we first map the 'big' image - and unmap the remainder at
 348        * the end. (which unmap is needed for ELF images with holes.)
 349        */
 350        if (total_size) {
 351                total_size = ELF_PAGEALIGN(total_size);
 352                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 353                if (!BAD_ADDR(map_addr))
 354                        vm_munmap(map_addr+size, total_size-size);
 355        } else
 356                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 357
 358        return(map_addr);
 359}
 360
 361#endif /* !elf_map */
 362
 363static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 364{
 365        int i, first_idx = -1, last_idx = -1;
 366
 367        for (i = 0; i < nr; i++) {
 368                if (cmds[i].p_type == PT_LOAD) {
 369                        last_idx = i;
 370                        if (first_idx == -1)
 371                                first_idx = i;
 372                }
 373        }
 374        if (first_idx == -1)
 375                return 0;
 376
 377        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 378                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 379}
 380
 381
 382/* This is much more generalized than the library routine read function,
 383   so we keep this separate.  Technically the library read function
 384   is only provided so that we can read a.out libraries that have
 385   an ELF header */
 386
 387static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 388                struct file *interpreter, unsigned long *interp_map_addr,
 389                unsigned long no_base)
 390{
 391        struct elf_phdr *elf_phdata;
 392        struct elf_phdr *eppnt;
 393        unsigned long load_addr = 0;
 394        int load_addr_set = 0;
 395        unsigned long last_bss = 0, elf_bss = 0;
 396        unsigned long error = ~0UL;
 397        unsigned long total_size;
 398        int retval, i, size;
 399
 400        /* First of all, some simple consistency checks */
 401        if (interp_elf_ex->e_type != ET_EXEC &&
 402            interp_elf_ex->e_type != ET_DYN)
 403                goto out;
 404        if (!elf_check_arch(interp_elf_ex))
 405                goto out;
 406        if (!interpreter->f_op || !interpreter->f_op->mmap)
 407                goto out;
 408
 409        /*
 410         * If the size of this structure has changed, then punt, since
 411         * we will be doing the wrong thing.
 412         */
 413        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 414                goto out;
 415        if (interp_elf_ex->e_phnum < 1 ||
 416                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 417                goto out;
 418
 419        /* Now read in all of the header information */
 420        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 421        if (size > ELF_MIN_ALIGN)
 422                goto out;
 423        elf_phdata = kmalloc(size, GFP_KERNEL);
 424        if (!elf_phdata)
 425                goto out;
 426
 427        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 428                             (char *)elf_phdata, size);
 429        error = -EIO;
 430        if (retval != size) {
 431                if (retval < 0)
 432                        error = retval; 
 433                goto out_close;
 434        }
 435
 436        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 437        if (!total_size) {
 438                error = -EINVAL;
 439                goto out_close;
 440        }
 441
 442        eppnt = elf_phdata;
 443        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 444                if (eppnt->p_type == PT_LOAD) {
 445                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 446                        int elf_prot = 0;
 447                        unsigned long vaddr = 0;
 448                        unsigned long k, map_addr;
 449
 450                        if (eppnt->p_flags & PF_R)
 451                                elf_prot = PROT_READ;
 452                        if (eppnt->p_flags & PF_W)
 453                                elf_prot |= PROT_WRITE;
 454                        if (eppnt->p_flags & PF_X)
 455                                elf_prot |= PROT_EXEC;
 456                        vaddr = eppnt->p_vaddr;
 457                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 458                                elf_type |= MAP_FIXED;
 459                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 460                                load_addr = -vaddr;
 461
 462                        map_addr = elf_map(interpreter, load_addr + vaddr,
 463                                        eppnt, elf_prot, elf_type, total_size);
 464                        total_size = 0;
 465                        if (!*interp_map_addr)
 466                                *interp_map_addr = map_addr;
 467                        error = map_addr;
 468                        if (BAD_ADDR(map_addr))
 469                                goto out_close;
 470
 471                        if (!load_addr_set &&
 472                            interp_elf_ex->e_type == ET_DYN) {
 473                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 474                                load_addr_set = 1;
 475                        }
 476
 477                        /*
 478                         * Check to see if the section's size will overflow the
 479                         * allowed task size. Note that p_filesz must always be
 480                         * <= p_memsize so it's only necessary to check p_memsz.
 481                         */
 482                        k = load_addr + eppnt->p_vaddr;
 483                        if (BAD_ADDR(k) ||
 484                            eppnt->p_filesz > eppnt->p_memsz ||
 485                            eppnt->p_memsz > TASK_SIZE ||
 486                            TASK_SIZE - eppnt->p_memsz < k) {
 487                                error = -ENOMEM;
 488                                goto out_close;
 489                        }
 490
 491                        /*
 492                         * Find the end of the file mapping for this phdr, and
 493                         * keep track of the largest address we see for this.
 494                         */
 495                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 496                        if (k > elf_bss)
 497                                elf_bss = k;
 498
 499                        /*
 500                         * Do the same thing for the memory mapping - between
 501                         * elf_bss and last_bss is the bss section.
 502                         */
 503                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 504                        if (k > last_bss)
 505                                last_bss = k;
 506                }
 507        }
 508
 509        if (last_bss > elf_bss) {
 510                /*
 511                 * Now fill out the bss section.  First pad the last page up
 512                 * to the page boundary, and then perform a mmap to make sure
 513                 * that there are zero-mapped pages up to and including the
 514                 * last bss page.
 515                 */
 516                if (padzero(elf_bss)) {
 517                        error = -EFAULT;
 518                        goto out_close;
 519                }
 520
 521                /* What we have mapped so far */
 522                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 523
 524                /* Map the last of the bss segment */
 525                error = vm_brk(elf_bss, last_bss - elf_bss);
 526                if (BAD_ADDR(error))
 527                        goto out_close;
 528        }
 529
 530        error = load_addr;
 531
 532out_close:
 533        kfree(elf_phdata);
 534out:
 535        return error;
 536}
 537
 538/*
 539 * These are the functions used to load ELF style executables and shared
 540 * libraries.  There is no binary dependent code anywhere else.
 541 */
 542
 543#define INTERPRETER_NONE 0
 544#define INTERPRETER_ELF 2
 545
 546#ifndef STACK_RND_MASK
 547#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 548#endif
 549
 550static unsigned long randomize_stack_top(unsigned long stack_top)
 551{
 552        unsigned int random_variable = 0;
 553
 554        if ((current->flags & PF_RANDOMIZE) &&
 555                !(current->personality & ADDR_NO_RANDOMIZE)) {
 556                random_variable = get_random_int() & STACK_RND_MASK;
 557                random_variable <<= PAGE_SHIFT;
 558        }
 559#ifdef CONFIG_STACK_GROWSUP
 560        return PAGE_ALIGN(stack_top) + random_variable;
 561#else
 562        return PAGE_ALIGN(stack_top) - random_variable;
 563#endif
 564}
 565
 566static int load_elf_binary(struct linux_binprm *bprm)
 567{
 568        struct file *interpreter = NULL; /* to shut gcc up */
 569        unsigned long load_addr = 0, load_bias = 0;
 570        int load_addr_set = 0;
 571        char * elf_interpreter = NULL;
 572        unsigned long error;
 573        struct elf_phdr *elf_ppnt, *elf_phdata;
 574        unsigned long elf_bss, elf_brk;
 575        int retval, i;
 576        unsigned int size;
 577        unsigned long elf_entry;
 578        unsigned long interp_load_addr = 0;
 579        unsigned long start_code, end_code, start_data, end_data;
 580        unsigned long reloc_func_desc __maybe_unused = 0;
 581        int executable_stack = EXSTACK_DEFAULT;
 582        unsigned long def_flags = 0;
 583        struct pt_regs *regs = current_pt_regs();
 584        struct {
 585                struct elfhdr elf_ex;
 586                struct elfhdr interp_elf_ex;
 587        } *loc;
 588
 589        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 590        if (!loc) {
 591                retval = -ENOMEM;
 592                goto out_ret;
 593        }
 594        
 595        /* Get the exec-header */
 596        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 597
 598        retval = -ENOEXEC;
 599        /* First of all, some simple consistency checks */
 600        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 601                goto out;
 602
 603        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 604                goto out;
 605        if (!elf_check_arch(&loc->elf_ex))
 606                goto out;
 607        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 608                goto out;
 609
 610        /* Now read in all of the header information */
 611        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 612                goto out;
 613        if (loc->elf_ex.e_phnum < 1 ||
 614                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 615                goto out;
 616        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 617        retval = -ENOMEM;
 618        elf_phdata = kmalloc(size, GFP_KERNEL);
 619        if (!elf_phdata)
 620                goto out;
 621
 622        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 623                             (char *)elf_phdata, size);
 624        if (retval != size) {
 625                if (retval >= 0)
 626                        retval = -EIO;
 627                goto out_free_ph;
 628        }
 629
 630        elf_ppnt = elf_phdata;
 631        elf_bss = 0;
 632        elf_brk = 0;
 633
 634        start_code = ~0UL;
 635        end_code = 0;
 636        start_data = 0;
 637        end_data = 0;
 638
 639        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 640                if (elf_ppnt->p_type == PT_INTERP) {
 641                        /* This is the program interpreter used for
 642                         * shared libraries - for now assume that this
 643                         * is an a.out format binary
 644                         */
 645                        retval = -ENOEXEC;
 646                        if (elf_ppnt->p_filesz > PATH_MAX || 
 647                            elf_ppnt->p_filesz < 2)
 648                                goto out_free_ph;
 649
 650                        retval = -ENOMEM;
 651                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 652                                                  GFP_KERNEL);
 653                        if (!elf_interpreter)
 654                                goto out_free_ph;
 655
 656                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 657                                             elf_interpreter,
 658                                             elf_ppnt->p_filesz);
 659                        if (retval != elf_ppnt->p_filesz) {
 660                                if (retval >= 0)
 661                                        retval = -EIO;
 662                                goto out_free_interp;
 663                        }
 664                        /* make sure path is NULL terminated */
 665                        retval = -ENOEXEC;
 666                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 667                                goto out_free_interp;
 668
 669                        interpreter = open_exec(elf_interpreter);
 670                        retval = PTR_ERR(interpreter);
 671                        if (IS_ERR(interpreter))
 672                                goto out_free_interp;
 673
 674                        /*
 675                         * If the binary is not readable then enforce
 676                         * mm->dumpable = 0 regardless of the interpreter's
 677                         * permissions.
 678                         */
 679                        would_dump(bprm, interpreter);
 680
 681                        retval = kernel_read(interpreter, 0, bprm->buf,
 682                                             BINPRM_BUF_SIZE);
 683                        if (retval != BINPRM_BUF_SIZE) {
 684                                if (retval >= 0)
 685                                        retval = -EIO;
 686                                goto out_free_dentry;
 687                        }
 688
 689                        /* Get the exec headers */
 690                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 691                        break;
 692                }
 693                elf_ppnt++;
 694        }
 695
 696        elf_ppnt = elf_phdata;
 697        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 698                if (elf_ppnt->p_type == PT_GNU_STACK) {
 699                        if (elf_ppnt->p_flags & PF_X)
 700                                executable_stack = EXSTACK_ENABLE_X;
 701                        else
 702                                executable_stack = EXSTACK_DISABLE_X;
 703                        break;
 704                }
 705
 706        /* Some simple consistency checks for the interpreter */
 707        if (elf_interpreter) {
 708                retval = -ELIBBAD;
 709                /* Not an ELF interpreter */
 710                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 711                        goto out_free_dentry;
 712                /* Verify the interpreter has a valid arch */
 713                if (!elf_check_arch(&loc->interp_elf_ex))
 714                        goto out_free_dentry;
 715        }
 716
 717        /* Flush all traces of the currently running executable */
 718        retval = flush_old_exec(bprm);
 719        if (retval)
 720                goto out_free_dentry;
 721
 722        /* OK, This is the point of no return */
 723        current->mm->def_flags = def_flags;
 724
 725        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 726           may depend on the personality.  */
 727        SET_PERSONALITY(loc->elf_ex);
 728        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 729                current->personality |= READ_IMPLIES_EXEC;
 730
 731        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 732                current->flags |= PF_RANDOMIZE;
 733
 734        setup_new_exec(bprm);
 735
 736        /* Do this so that we can load the interpreter, if need be.  We will
 737           change some of these later */
 738        current->mm->free_area_cache = current->mm->mmap_base;
 739        current->mm->cached_hole_size = 0;
 740        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 741                                 executable_stack);
 742        if (retval < 0) {
 743                send_sig(SIGKILL, current, 0);
 744                goto out_free_dentry;
 745        }
 746        
 747        current->mm->start_stack = bprm->p;
 748
 749        /* Now we do a little grungy work by mmapping the ELF image into
 750           the correct location in memory. */
 751        for(i = 0, elf_ppnt = elf_phdata;
 752            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 753                int elf_prot = 0, elf_flags;
 754                unsigned long k, vaddr;
 755
 756                if (elf_ppnt->p_type != PT_LOAD)
 757                        continue;
 758
 759                if (unlikely (elf_brk > elf_bss)) {
 760                        unsigned long nbyte;
 761                    
 762                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 763                           before this one. Map anonymous pages, if needed,
 764                           and clear the area.  */
 765                        retval = set_brk(elf_bss + load_bias,
 766                                         elf_brk + load_bias);
 767                        if (retval) {
 768                                send_sig(SIGKILL, current, 0);
 769                                goto out_free_dentry;
 770                        }
 771                        nbyte = ELF_PAGEOFFSET(elf_bss);
 772                        if (nbyte) {
 773                                nbyte = ELF_MIN_ALIGN - nbyte;
 774                                if (nbyte > elf_brk - elf_bss)
 775                                        nbyte = elf_brk - elf_bss;
 776                                if (clear_user((void __user *)elf_bss +
 777                                                        load_bias, nbyte)) {
 778                                        /*
 779                                         * This bss-zeroing can fail if the ELF
 780                                         * file specifies odd protections. So
 781                                         * we don't check the return value
 782                                         */
 783                                }
 784                        }
 785                }
 786
 787                if (elf_ppnt->p_flags & PF_R)
 788                        elf_prot |= PROT_READ;
 789                if (elf_ppnt->p_flags & PF_W)
 790                        elf_prot |= PROT_WRITE;
 791                if (elf_ppnt->p_flags & PF_X)
 792                        elf_prot |= PROT_EXEC;
 793
 794                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 795
 796                vaddr = elf_ppnt->p_vaddr;
 797                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 798                        elf_flags |= MAP_FIXED;
 799                } else if (loc->elf_ex.e_type == ET_DYN) {
 800                        /* Try and get dynamic programs out of the way of the
 801                         * default mmap base, as well as whatever program they
 802                         * might try to exec.  This is because the brk will
 803                         * follow the loader, and is not movable.  */
 804#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 805                        /* Memory randomization might have been switched off
 806                         * in runtime via sysctl.
 807                         * If that is the case, retain the original non-zero
 808                         * load_bias value in order to establish proper
 809                         * non-randomized mappings.
 810                         */
 811                        if (current->flags & PF_RANDOMIZE)
 812                                load_bias = 0;
 813                        else
 814                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 815#else
 816                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 817#endif
 818                }
 819
 820                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 821                                elf_prot, elf_flags, 0);
 822                if (BAD_ADDR(error)) {
 823                        send_sig(SIGKILL, current, 0);
 824                        retval = IS_ERR((void *)error) ?
 825                                PTR_ERR((void*)error) : -EINVAL;
 826                        goto out_free_dentry;
 827                }
 828
 829                if (!load_addr_set) {
 830                        load_addr_set = 1;
 831                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 832                        if (loc->elf_ex.e_type == ET_DYN) {
 833                                load_bias += error -
 834                                             ELF_PAGESTART(load_bias + vaddr);
 835                                load_addr += load_bias;
 836                                reloc_func_desc = load_bias;
 837                        }
 838                }
 839                k = elf_ppnt->p_vaddr;
 840                if (k < start_code)
 841                        start_code = k;
 842                if (start_data < k)
 843                        start_data = k;
 844
 845                /*
 846                 * Check to see if the section's size will overflow the
 847                 * allowed task size. Note that p_filesz must always be
 848                 * <= p_memsz so it is only necessary to check p_memsz.
 849                 */
 850                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 851                    elf_ppnt->p_memsz > TASK_SIZE ||
 852                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 853                        /* set_brk can never work. Avoid overflows. */
 854                        send_sig(SIGKILL, current, 0);
 855                        retval = -EINVAL;
 856                        goto out_free_dentry;
 857                }
 858
 859                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 860
 861                if (k > elf_bss)
 862                        elf_bss = k;
 863                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 864                        end_code = k;
 865                if (end_data < k)
 866                        end_data = k;
 867                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 868                if (k > elf_brk)
 869                        elf_brk = k;
 870        }
 871
 872        loc->elf_ex.e_entry += load_bias;
 873        elf_bss += load_bias;
 874        elf_brk += load_bias;
 875        start_code += load_bias;
 876        end_code += load_bias;
 877        start_data += load_bias;
 878        end_data += load_bias;
 879
 880        /* Calling set_brk effectively mmaps the pages that we need
 881         * for the bss and break sections.  We must do this before
 882         * mapping in the interpreter, to make sure it doesn't wind
 883         * up getting placed where the bss needs to go.
 884         */
 885        retval = set_brk(elf_bss, elf_brk);
 886        if (retval) {
 887                send_sig(SIGKILL, current, 0);
 888                goto out_free_dentry;
 889        }
 890        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 891                send_sig(SIGSEGV, current, 0);
 892                retval = -EFAULT; /* Nobody gets to see this, but.. */
 893                goto out_free_dentry;
 894        }
 895
 896        if (elf_interpreter) {
 897                unsigned long interp_map_addr = 0;
 898
 899                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 900                                            interpreter,
 901                                            &interp_map_addr,
 902                                            load_bias);
 903                if (!IS_ERR((void *)elf_entry)) {
 904                        /*
 905                         * load_elf_interp() returns relocation
 906                         * adjustment
 907                         */
 908                        interp_load_addr = elf_entry;
 909                        elf_entry += loc->interp_elf_ex.e_entry;
 910                }
 911                if (BAD_ADDR(elf_entry)) {
 912                        force_sig(SIGSEGV, current);
 913                        retval = IS_ERR((void *)elf_entry) ?
 914                                        (int)elf_entry : -EINVAL;
 915                        goto out_free_dentry;
 916                }
 917                reloc_func_desc = interp_load_addr;
 918
 919                allow_write_access(interpreter);
 920                fput(interpreter);
 921                kfree(elf_interpreter);
 922        } else {
 923                elf_entry = loc->elf_ex.e_entry;
 924                if (BAD_ADDR(elf_entry)) {
 925                        force_sig(SIGSEGV, current);
 926                        retval = -EINVAL;
 927                        goto out_free_dentry;
 928                }
 929        }
 930
 931        kfree(elf_phdata);
 932
 933        set_binfmt(&elf_format);
 934
 935#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 936        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 937        if (retval < 0) {
 938                send_sig(SIGKILL, current, 0);
 939                goto out;
 940        }
 941#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 942
 943        install_exec_creds(bprm);
 944        retval = create_elf_tables(bprm, &loc->elf_ex,
 945                          load_addr, interp_load_addr);
 946        if (retval < 0) {
 947                send_sig(SIGKILL, current, 0);
 948                goto out;
 949        }
 950        /* N.B. passed_fileno might not be initialized? */
 951        current->mm->end_code = end_code;
 952        current->mm->start_code = start_code;
 953        current->mm->start_data = start_data;
 954        current->mm->end_data = end_data;
 955        current->mm->start_stack = bprm->p;
 956
 957#ifdef arch_randomize_brk
 958        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 959                current->mm->brk = current->mm->start_brk =
 960                        arch_randomize_brk(current->mm);
 961#ifdef CONFIG_COMPAT_BRK
 962                current->brk_randomized = 1;
 963#endif
 964        }
 965#endif
 966
 967        if (current->personality & MMAP_PAGE_ZERO) {
 968                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 969                   and some applications "depend" upon this behavior.
 970                   Since we do not have the power to recompile these, we
 971                   emulate the SVr4 behavior. Sigh. */
 972                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 973                                MAP_FIXED | MAP_PRIVATE, 0);
 974        }
 975
 976#ifdef ELF_PLAT_INIT
 977        /*
 978         * The ABI may specify that certain registers be set up in special
 979         * ways (on i386 %edx is the address of a DT_FINI function, for
 980         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 981         * that the e_entry field is the address of the function descriptor
 982         * for the startup routine, rather than the address of the startup
 983         * routine itself.  This macro performs whatever initialization to
 984         * the regs structure is required as well as any relocations to the
 985         * function descriptor entries when executing dynamically links apps.
 986         */
 987        ELF_PLAT_INIT(regs, reloc_func_desc);
 988#endif
 989
 990        start_thread(regs, elf_entry, bprm->p);
 991        retval = 0;
 992out:
 993        kfree(loc);
 994out_ret:
 995        return retval;
 996
 997        /* error cleanup */
 998out_free_dentry:
 999        allow_write_access(interpreter);
1000        if (interpreter)
1001                fput(interpreter);
1002out_free_interp:
1003        kfree(elf_interpreter);
1004out_free_ph:
1005        kfree(elf_phdata);
1006        goto out;
1007}
1008
1009/* This is really simpleminded and specialized - we are loading an
1010   a.out library that is given an ELF header. */
1011static int load_elf_library(struct file *file)
1012{
1013        struct elf_phdr *elf_phdata;
1014        struct elf_phdr *eppnt;
1015        unsigned long elf_bss, bss, len;
1016        int retval, error, i, j;
1017        struct elfhdr elf_ex;
1018
1019        error = -ENOEXEC;
1020        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1021        if (retval != sizeof(elf_ex))
1022                goto out;
1023
1024        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1025                goto out;
1026
1027        /* First of all, some simple consistency checks */
1028        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1029            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1030                goto out;
1031
1032        /* Now read in all of the header information */
1033
1034        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1035        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1036
1037        error = -ENOMEM;
1038        elf_phdata = kmalloc(j, GFP_KERNEL);
1039        if (!elf_phdata)
1040                goto out;
1041
1042        eppnt = elf_phdata;
1043        error = -ENOEXEC;
1044        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1045        if (retval != j)
1046                goto out_free_ph;
1047
1048        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1049                if ((eppnt + i)->p_type == PT_LOAD)
1050                        j++;
1051        if (j != 1)
1052                goto out_free_ph;
1053
1054        while (eppnt->p_type != PT_LOAD)
1055                eppnt++;
1056
1057        /* Now use mmap to map the library into memory. */
1058        error = vm_mmap(file,
1059                        ELF_PAGESTART(eppnt->p_vaddr),
1060                        (eppnt->p_filesz +
1061                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1062                        PROT_READ | PROT_WRITE | PROT_EXEC,
1063                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1064                        (eppnt->p_offset -
1065                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1066        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1067                goto out_free_ph;
1068
1069        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1070        if (padzero(elf_bss)) {
1071                error = -EFAULT;
1072                goto out_free_ph;
1073        }
1074
1075        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1076                            ELF_MIN_ALIGN - 1);
1077        bss = eppnt->p_memsz + eppnt->p_vaddr;
1078        if (bss > len)
1079                vm_brk(len, bss - len);
1080        error = 0;
1081
1082out_free_ph:
1083        kfree(elf_phdata);
1084out:
1085        return error;
1086}
1087
1088#ifdef CONFIG_ELF_CORE
1089/*
1090 * ELF core dumper
1091 *
1092 * Modelled on fs/exec.c:aout_core_dump()
1093 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1094 */
1095
1096/*
1097 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1098 * that are useful for post-mortem analysis are included in every core dump.
1099 * In that way we ensure that the core dump is fully interpretable later
1100 * without matching up the same kernel and hardware config to see what PC values
1101 * meant. These special mappings include - vDSO, vsyscall, and other
1102 * architecture specific mappings
1103 */
1104static bool always_dump_vma(struct vm_area_struct *vma)
1105{
1106        /* Any vsyscall mappings? */
1107        if (vma == get_gate_vma(vma->vm_mm))
1108                return true;
1109        /*
1110         * arch_vma_name() returns non-NULL for special architecture mappings,
1111         * such as vDSO sections.
1112         */
1113        if (arch_vma_name(vma))
1114                return true;
1115
1116        return false;
1117}
1118
1119/*
1120 * Decide what to dump of a segment, part, all or none.
1121 */
1122static unsigned long vma_dump_size(struct vm_area_struct *vma,
1123                                   unsigned long mm_flags)
1124{
1125#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1126
1127        /* always dump the vdso and vsyscall sections */
1128        if (always_dump_vma(vma))
1129                goto whole;
1130
1131        if (vma->vm_flags & VM_DONTDUMP)
1132                return 0;
1133
1134        /* Hugetlb memory check */
1135        if (vma->vm_flags & VM_HUGETLB) {
1136                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1137                        goto whole;
1138                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1139                        goto whole;
1140                return 0;
1141        }
1142
1143        /* Do not dump I/O mapped devices or special mappings */
1144        if (vma->vm_flags & VM_IO)
1145                return 0;
1146
1147        /* By default, dump shared memory if mapped from an anonymous file. */
1148        if (vma->vm_flags & VM_SHARED) {
1149                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1150                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1151                        goto whole;
1152                return 0;
1153        }
1154
1155        /* Dump segments that have been written to.  */
1156        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1157                goto whole;
1158        if (vma->vm_file == NULL)
1159                return 0;
1160
1161        if (FILTER(MAPPED_PRIVATE))
1162                goto whole;
1163
1164        /*
1165         * If this looks like the beginning of a DSO or executable mapping,
1166         * check for an ELF header.  If we find one, dump the first page to
1167         * aid in determining what was mapped here.
1168         */
1169        if (FILTER(ELF_HEADERS) &&
1170            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1171                u32 __user *header = (u32 __user *) vma->vm_start;
1172                u32 word;
1173                mm_segment_t fs = get_fs();
1174                /*
1175                 * Doing it this way gets the constant folded by GCC.
1176                 */
1177                union {
1178                        u32 cmp;
1179                        char elfmag[SELFMAG];
1180                } magic;
1181                BUILD_BUG_ON(SELFMAG != sizeof word);
1182                magic.elfmag[EI_MAG0] = ELFMAG0;
1183                magic.elfmag[EI_MAG1] = ELFMAG1;
1184                magic.elfmag[EI_MAG2] = ELFMAG2;
1185                magic.elfmag[EI_MAG3] = ELFMAG3;
1186                /*
1187                 * Switch to the user "segment" for get_user(),
1188                 * then put back what elf_core_dump() had in place.
1189                 */
1190                set_fs(USER_DS);
1191                if (unlikely(get_user(word, header)))
1192                        word = 0;
1193                set_fs(fs);
1194                if (word == magic.cmp)
1195                        return PAGE_SIZE;
1196        }
1197
1198#undef  FILTER
1199
1200        return 0;
1201
1202whole:
1203        return vma->vm_end - vma->vm_start;
1204}
1205
1206/* An ELF note in memory */
1207struct memelfnote
1208{
1209        const char *name;
1210        int type;
1211        unsigned int datasz;
1212        void *data;
1213};
1214
1215static int notesize(struct memelfnote *en)
1216{
1217        int sz;
1218
1219        sz = sizeof(struct elf_note);
1220        sz += roundup(strlen(en->name) + 1, 4);
1221        sz += roundup(en->datasz, 4);
1222
1223        return sz;
1224}
1225
1226#define DUMP_WRITE(addr, nr, foffset)   \
1227        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1228
1229static int alignfile(struct file *file, loff_t *foffset)
1230{
1231        static const char buf[4] = { 0, };
1232        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1233        return 1;
1234}
1235
1236static int writenote(struct memelfnote *men, struct file *file,
1237                        loff_t *foffset)
1238{
1239        struct elf_note en;
1240        en.n_namesz = strlen(men->name) + 1;
1241        en.n_descsz = men->datasz;
1242        en.n_type = men->type;
1243
1244        DUMP_WRITE(&en, sizeof(en), foffset);
1245        DUMP_WRITE(men->name, en.n_namesz, foffset);
1246        if (!alignfile(file, foffset))
1247                return 0;
1248        DUMP_WRITE(men->data, men->datasz, foffset);
1249        if (!alignfile(file, foffset))
1250                return 0;
1251
1252        return 1;
1253}
1254#undef DUMP_WRITE
1255
1256static void fill_elf_header(struct elfhdr *elf, int segs,
1257                            u16 machine, u32 flags)
1258{
1259        memset(elf, 0, sizeof(*elf));
1260
1261        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1262        elf->e_ident[EI_CLASS] = ELF_CLASS;
1263        elf->e_ident[EI_DATA] = ELF_DATA;
1264        elf->e_ident[EI_VERSION] = EV_CURRENT;
1265        elf->e_ident[EI_OSABI] = ELF_OSABI;
1266
1267        elf->e_type = ET_CORE;
1268        elf->e_machine = machine;
1269        elf->e_version = EV_CURRENT;
1270        elf->e_phoff = sizeof(struct elfhdr);
1271        elf->e_flags = flags;
1272        elf->e_ehsize = sizeof(struct elfhdr);
1273        elf->e_phentsize = sizeof(struct elf_phdr);
1274        elf->e_phnum = segs;
1275
1276        return;
1277}
1278
1279static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1280{
1281        phdr->p_type = PT_NOTE;
1282        phdr->p_offset = offset;
1283        phdr->p_vaddr = 0;
1284        phdr->p_paddr = 0;
1285        phdr->p_filesz = sz;
1286        phdr->p_memsz = 0;
1287        phdr->p_flags = 0;
1288        phdr->p_align = 0;
1289        return;
1290}
1291
1292static void fill_note(struct memelfnote *note, const char *name, int type, 
1293                unsigned int sz, void *data)
1294{
1295        note->name = name;
1296        note->type = type;
1297        note->datasz = sz;
1298        note->data = data;
1299        return;
1300}
1301
1302/*
1303 * fill up all the fields in prstatus from the given task struct, except
1304 * registers which need to be filled up separately.
1305 */
1306static void fill_prstatus(struct elf_prstatus *prstatus,
1307                struct task_struct *p, long signr)
1308{
1309        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1310        prstatus->pr_sigpend = p->pending.signal.sig[0];
1311        prstatus->pr_sighold = p->blocked.sig[0];
1312        rcu_read_lock();
1313        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1314        rcu_read_unlock();
1315        prstatus->pr_pid = task_pid_vnr(p);
1316        prstatus->pr_pgrp = task_pgrp_vnr(p);
1317        prstatus->pr_sid = task_session_vnr(p);
1318        if (thread_group_leader(p)) {
1319                struct task_cputime cputime;
1320
1321                /*
1322                 * This is the record for the group leader.  It shows the
1323                 * group-wide total, not its individual thread total.
1324                 */
1325                thread_group_cputime(p, &cputime);
1326                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1327                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1328        } else {
1329                cputime_t utime, stime;
1330
1331                task_cputime(p, &utime, &stime);
1332                cputime_to_timeval(utime, &prstatus->pr_utime);
1333                cputime_to_timeval(stime, &prstatus->pr_stime);
1334        }
1335        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1336        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1337}
1338
1339static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1340                       struct mm_struct *mm)
1341{
1342        const struct cred *cred;
1343        unsigned int i, len;
1344        
1345        /* first copy the parameters from user space */
1346        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1347
1348        len = mm->arg_end - mm->arg_start;
1349        if (len >= ELF_PRARGSZ)
1350                len = ELF_PRARGSZ-1;
1351        if (copy_from_user(&psinfo->pr_psargs,
1352                           (const char __user *)mm->arg_start, len))
1353                return -EFAULT;
1354        for(i = 0; i < len; i++)
1355                if (psinfo->pr_psargs[i] == 0)
1356                        psinfo->pr_psargs[i] = ' ';
1357        psinfo->pr_psargs[len] = 0;
1358
1359        rcu_read_lock();
1360        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1361        rcu_read_unlock();
1362        psinfo->pr_pid = task_pid_vnr(p);
1363        psinfo->pr_pgrp = task_pgrp_vnr(p);
1364        psinfo->pr_sid = task_session_vnr(p);
1365
1366        i = p->state ? ffz(~p->state) + 1 : 0;
1367        psinfo->pr_state = i;
1368        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1369        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1370        psinfo->pr_nice = task_nice(p);
1371        psinfo->pr_flag = p->flags;
1372        rcu_read_lock();
1373        cred = __task_cred(p);
1374        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1375        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1376        rcu_read_unlock();
1377        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1378        
1379        return 0;
1380}
1381
1382static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1383{
1384        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1385        int i = 0;
1386        do
1387                i += 2;
1388        while (auxv[i - 2] != AT_NULL);
1389        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1390}
1391
1392static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1393                siginfo_t *siginfo)
1394{
1395        mm_segment_t old_fs = get_fs();
1396        set_fs(KERNEL_DS);
1397        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1398        set_fs(old_fs);
1399        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1400}
1401
1402#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1403/*
1404 * Format of NT_FILE note:
1405 *
1406 * long count     -- how many files are mapped
1407 * long page_size -- units for file_ofs
1408 * array of [COUNT] elements of
1409 *   long start
1410 *   long end
1411 *   long file_ofs
1412 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1413 */
1414static void fill_files_note(struct memelfnote *note)
1415{
1416        struct vm_area_struct *vma;
1417        unsigned count, size, names_ofs, remaining, n;
1418        user_long_t *data;
1419        user_long_t *start_end_ofs;
1420        char *name_base, *name_curpos;
1421
1422        /* *Estimated* file count and total data size needed */
1423        count = current->mm->map_count;
1424        size = count * 64;
1425
1426        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1427 alloc:
1428        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1429                goto err;
1430        size = round_up(size, PAGE_SIZE);
1431        data = vmalloc(size);
1432        if (!data)
1433                goto err;
1434
1435        start_end_ofs = data + 2;
1436        name_base = name_curpos = ((char *)data) + names_ofs;
1437        remaining = size - names_ofs;
1438        count = 0;
1439        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1440                struct file *file;
1441                const char *filename;
1442
1443                file = vma->vm_file;
1444                if (!file)
1445                        continue;
1446                filename = d_path(&file->f_path, name_curpos, remaining);
1447                if (IS_ERR(filename)) {
1448                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1449                                vfree(data);
1450                                size = size * 5 / 4;
1451                                goto alloc;
1452                        }
1453                        continue;
1454                }
1455
1456                /* d_path() fills at the end, move name down */
1457                /* n = strlen(filename) + 1: */
1458                n = (name_curpos + remaining) - filename;
1459                remaining = filename - name_curpos;
1460                memmove(name_curpos, filename, n);
1461                name_curpos += n;
1462
1463                *start_end_ofs++ = vma->vm_start;
1464                *start_end_ofs++ = vma->vm_end;
1465                *start_end_ofs++ = vma->vm_pgoff;
1466                count++;
1467        }
1468
1469        /* Now we know exact count of files, can store it */
1470        data[0] = count;
1471        data[1] = PAGE_SIZE;
1472        /*
1473         * Count usually is less than current->mm->map_count,
1474         * we need to move filenames down.
1475         */
1476        n = current->mm->map_count - count;
1477        if (n != 0) {
1478                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1479                memmove(name_base - shift_bytes, name_base,
1480                        name_curpos - name_base);
1481                name_curpos -= shift_bytes;
1482        }
1483
1484        size = name_curpos - (char *)data;
1485        fill_note(note, "CORE", NT_FILE, size, data);
1486 err: ;
1487}
1488
1489#ifdef CORE_DUMP_USE_REGSET
1490#include <linux/regset.h>
1491
1492struct elf_thread_core_info {
1493        struct elf_thread_core_info *next;
1494        struct task_struct *task;
1495        struct elf_prstatus prstatus;
1496        struct memelfnote notes[0];
1497};
1498
1499struct elf_note_info {
1500        struct elf_thread_core_info *thread;
1501        struct memelfnote psinfo;
1502        struct memelfnote signote;
1503        struct memelfnote auxv;
1504        struct memelfnote files;
1505        user_siginfo_t csigdata;
1506        size_t size;
1507        int thread_notes;
1508};
1509
1510/*
1511 * When a regset has a writeback hook, we call it on each thread before
1512 * dumping user memory.  On register window machines, this makes sure the
1513 * user memory backing the register data is up to date before we read it.
1514 */
1515static void do_thread_regset_writeback(struct task_struct *task,
1516                                       const struct user_regset *regset)
1517{
1518        if (regset->writeback)
1519                regset->writeback(task, regset, 1);
1520}
1521
1522#ifndef PR_REG_SIZE
1523#define PR_REG_SIZE(S) sizeof(S)
1524#endif
1525
1526#ifndef PRSTATUS_SIZE
1527#define PRSTATUS_SIZE(S) sizeof(S)
1528#endif
1529
1530#ifndef PR_REG_PTR
1531#define PR_REG_PTR(S) (&((S)->pr_reg))
1532#endif
1533
1534#ifndef SET_PR_FPVALID
1535#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1536#endif
1537
1538static int fill_thread_core_info(struct elf_thread_core_info *t,
1539                                 const struct user_regset_view *view,
1540                                 long signr, size_t *total)
1541{
1542        unsigned int i;
1543
1544        /*
1545         * NT_PRSTATUS is the one special case, because the regset data
1546         * goes into the pr_reg field inside the note contents, rather
1547         * than being the whole note contents.  We fill the reset in here.
1548         * We assume that regset 0 is NT_PRSTATUS.
1549         */
1550        fill_prstatus(&t->prstatus, t->task, signr);
1551        (void) view->regsets[0].get(t->task, &view->regsets[0],
1552                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1553                                    PR_REG_PTR(&t->prstatus), NULL);
1554
1555        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1556                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1557        *total += notesize(&t->notes[0]);
1558
1559        do_thread_regset_writeback(t->task, &view->regsets[0]);
1560
1561        /*
1562         * Each other regset might generate a note too.  For each regset
1563         * that has no core_note_type or is inactive, we leave t->notes[i]
1564         * all zero and we'll know to skip writing it later.
1565         */
1566        for (i = 1; i < view->n; ++i) {
1567                const struct user_regset *regset = &view->regsets[i];
1568                do_thread_regset_writeback(t->task, regset);
1569                if (regset->core_note_type && regset->get &&
1570                    (!regset->active || regset->active(t->task, regset))) {
1571                        int ret;
1572                        size_t size = regset->n * regset->size;
1573                        void *data = kmalloc(size, GFP_KERNEL);
1574                        if (unlikely(!data))
1575                                return 0;
1576                        ret = regset->get(t->task, regset,
1577                                          0, size, data, NULL);
1578                        if (unlikely(ret))
1579                                kfree(data);
1580                        else {
1581                                if (regset->core_note_type != NT_PRFPREG)
1582                                        fill_note(&t->notes[i], "LINUX",
1583                                                  regset->core_note_type,
1584                                                  size, data);
1585                                else {
1586                                        SET_PR_FPVALID(&t->prstatus, 1);
1587                                        fill_note(&t->notes[i], "CORE",
1588                                                  NT_PRFPREG, size, data);
1589                                }
1590                                *total += notesize(&t->notes[i]);
1591                        }
1592                }
1593        }
1594
1595        return 1;
1596}
1597
1598static int fill_note_info(struct elfhdr *elf, int phdrs,
1599                          struct elf_note_info *info,
1600                          siginfo_t *siginfo, struct pt_regs *regs)
1601{
1602        struct task_struct *dump_task = current;
1603        const struct user_regset_view *view = task_user_regset_view(dump_task);
1604        struct elf_thread_core_info *t;
1605        struct elf_prpsinfo *psinfo;
1606        struct core_thread *ct;
1607        unsigned int i;
1608
1609        info->size = 0;
1610        info->thread = NULL;
1611
1612        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1613        if (psinfo == NULL) {
1614                info->psinfo.data = NULL; /* So we don't free this wrongly */
1615                return 0;
1616        }
1617
1618        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1619
1620        /*
1621         * Figure out how many notes we're going to need for each thread.
1622         */
1623        info->thread_notes = 0;
1624        for (i = 0; i < view->n; ++i)
1625                if (view->regsets[i].core_note_type != 0)
1626                        ++info->thread_notes;
1627
1628        /*
1629         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1630         * since it is our one special case.
1631         */
1632        if (unlikely(info->thread_notes == 0) ||
1633            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1634                WARN_ON(1);
1635                return 0;
1636        }
1637
1638        /*
1639         * Initialize the ELF file header.
1640         */
1641        fill_elf_header(elf, phdrs,
1642                        view->e_machine, view->e_flags);
1643
1644        /*
1645         * Allocate a structure for each thread.
1646         */
1647        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1648                t = kzalloc(offsetof(struct elf_thread_core_info,
1649                                     notes[info->thread_notes]),
1650                            GFP_KERNEL);
1651                if (unlikely(!t))
1652                        return 0;
1653
1654                t->task = ct->task;
1655                if (ct->task == dump_task || !info->thread) {
1656                        t->next = info->thread;
1657                        info->thread = t;
1658                } else {
1659                        /*
1660                         * Make sure to keep the original task at
1661                         * the head of the list.
1662                         */
1663                        t->next = info->thread->next;
1664                        info->thread->next = t;
1665                }
1666        }
1667
1668        /*
1669         * Now fill in each thread's information.
1670         */
1671        for (t = info->thread; t != NULL; t = t->next)
1672                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1673                        return 0;
1674
1675        /*
1676         * Fill in the two process-wide notes.
1677         */
1678        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1679        info->size += notesize(&info->psinfo);
1680
1681        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1682        info->size += notesize(&info->signote);
1683
1684        fill_auxv_note(&info->auxv, current->mm);
1685        info->size += notesize(&info->auxv);
1686
1687        fill_files_note(&info->files);
1688        info->size += notesize(&info->files);
1689
1690        return 1;
1691}
1692
1693static size_t get_note_info_size(struct elf_note_info *info)
1694{
1695        return info->size;
1696}
1697
1698/*
1699 * Write all the notes for each thread.  When writing the first thread, the
1700 * process-wide notes are interleaved after the first thread-specific note.
1701 */
1702static int write_note_info(struct elf_note_info *info,
1703                           struct file *file, loff_t *foffset)
1704{
1705        bool first = 1;
1706        struct elf_thread_core_info *t = info->thread;
1707
1708        do {
1709                int i;
1710
1711                if (!writenote(&t->notes[0], file, foffset))
1712                        return 0;
1713
1714                if (first && !writenote(&info->psinfo, file, foffset))
1715                        return 0;
1716                if (first && !writenote(&info->signote, file, foffset))
1717                        return 0;
1718                if (first && !writenote(&info->auxv, file, foffset))
1719                        return 0;
1720                if (first && !writenote(&info->files, file, foffset))
1721                        return 0;
1722
1723                for (i = 1; i < info->thread_notes; ++i)
1724                        if (t->notes[i].data &&
1725                            !writenote(&t->notes[i], file, foffset))
1726                                return 0;
1727
1728                first = 0;
1729                t = t->next;
1730        } while (t);
1731
1732        return 1;
1733}
1734
1735static void free_note_info(struct elf_note_info *info)
1736{
1737        struct elf_thread_core_info *threads = info->thread;
1738        while (threads) {
1739                unsigned int i;
1740                struct elf_thread_core_info *t = threads;
1741                threads = t->next;
1742                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1743                for (i = 1; i < info->thread_notes; ++i)
1744                        kfree(t->notes[i].data);
1745                kfree(t);
1746        }
1747        kfree(info->psinfo.data);
1748        vfree(info->files.data);
1749}
1750
1751#else
1752
1753/* Here is the structure in which status of each thread is captured. */
1754struct elf_thread_status
1755{
1756        struct list_head list;
1757        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1758        elf_fpregset_t fpu;             /* NT_PRFPREG */
1759        struct task_struct *thread;
1760#ifdef ELF_CORE_COPY_XFPREGS
1761        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1762#endif
1763        struct memelfnote notes[3];
1764        int num_notes;
1765};
1766
1767/*
1768 * In order to add the specific thread information for the elf file format,
1769 * we need to keep a linked list of every threads pr_status and then create
1770 * a single section for them in the final core file.
1771 */
1772static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1773{
1774        int sz = 0;
1775        struct task_struct *p = t->thread;
1776        t->num_notes = 0;
1777
1778        fill_prstatus(&t->prstatus, p, signr);
1779        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1780        
1781        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1782                  &(t->prstatus));
1783        t->num_notes++;
1784        sz += notesize(&t->notes[0]);
1785
1786        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1787                                                                &t->fpu))) {
1788                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1789                          &(t->fpu));
1790                t->num_notes++;
1791                sz += notesize(&t->notes[1]);
1792        }
1793
1794#ifdef ELF_CORE_COPY_XFPREGS
1795        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1796                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1797                          sizeof(t->xfpu), &t->xfpu);
1798                t->num_notes++;
1799                sz += notesize(&t->notes[2]);
1800        }
1801#endif  
1802        return sz;
1803}
1804
1805struct elf_note_info {
1806        struct memelfnote *notes;
1807        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1808        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1809        struct list_head thread_list;
1810        elf_fpregset_t *fpu;
1811#ifdef ELF_CORE_COPY_XFPREGS
1812        elf_fpxregset_t *xfpu;
1813#endif
1814        user_siginfo_t csigdata;
1815        int thread_status_size;
1816        int numnote;
1817};
1818
1819static int elf_note_info_init(struct elf_note_info *info)
1820{
1821        memset(info, 0, sizeof(*info));
1822        INIT_LIST_HEAD(&info->thread_list);
1823
1824        /* Allocate space for ELF notes */
1825        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1826        if (!info->notes)
1827                return 0;
1828        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1829        if (!info->psinfo)
1830                return 0;
1831        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1832        if (!info->prstatus)
1833                return 0;
1834        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1835        if (!info->fpu)
1836                return 0;
1837#ifdef ELF_CORE_COPY_XFPREGS
1838        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1839        if (!info->xfpu)
1840                return 0;
1841#endif
1842        return 1;
1843}
1844
1845static int fill_note_info(struct elfhdr *elf, int phdrs,
1846                          struct elf_note_info *info,
1847                          siginfo_t *siginfo, struct pt_regs *regs)
1848{
1849        struct list_head *t;
1850
1851        if (!elf_note_info_init(info))
1852                return 0;
1853
1854        if (siginfo->si_signo) {
1855                struct core_thread *ct;
1856                struct elf_thread_status *ets;
1857
1858                for (ct = current->mm->core_state->dumper.next;
1859                                                ct; ct = ct->next) {
1860                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1861                        if (!ets)
1862                                return 0;
1863
1864                        ets->thread = ct->task;
1865                        list_add(&ets->list, &info->thread_list);
1866                }
1867
1868                list_for_each(t, &info->thread_list) {
1869                        int sz;
1870
1871                        ets = list_entry(t, struct elf_thread_status, list);
1872                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1873                        info->thread_status_size += sz;
1874                }
1875        }
1876        /* now collect the dump for the current */
1877        memset(info->prstatus, 0, sizeof(*info->prstatus));
1878        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1879        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1880
1881        /* Set up header */
1882        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1883
1884        /*
1885         * Set up the notes in similar form to SVR4 core dumps made
1886         * with info from their /proc.
1887         */
1888
1889        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1890                  sizeof(*info->prstatus), info->prstatus);
1891        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1892        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1893                  sizeof(*info->psinfo), info->psinfo);
1894
1895        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1896        fill_auxv_note(info->notes + 3, current->mm);
1897        fill_files_note(info->notes + 4);
1898
1899        info->numnote = 5;
1900
1901        /* Try to dump the FPU. */
1902        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1903                                                               info->fpu);
1904        if (info->prstatus->pr_fpvalid)
1905                fill_note(info->notes + info->numnote++,
1906                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1907#ifdef ELF_CORE_COPY_XFPREGS
1908        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1909                fill_note(info->notes + info->numnote++,
1910                          "LINUX", ELF_CORE_XFPREG_TYPE,
1911                          sizeof(*info->xfpu), info->xfpu);
1912#endif
1913
1914        return 1;
1915}
1916
1917static size_t get_note_info_size(struct elf_note_info *info)
1918{
1919        int sz = 0;
1920        int i;
1921
1922        for (i = 0; i < info->numnote; i++)
1923                sz += notesize(info->notes + i);
1924
1925        sz += info->thread_status_size;
1926
1927        return sz;
1928}
1929
1930static int write_note_info(struct elf_note_info *info,
1931                           struct file *file, loff_t *foffset)
1932{
1933        int i;
1934        struct list_head *t;
1935
1936        for (i = 0; i < info->numnote; i++)
1937                if (!writenote(info->notes + i, file, foffset))
1938                        return 0;
1939
1940        /* write out the thread status notes section */
1941        list_for_each(t, &info->thread_list) {
1942                struct elf_thread_status *tmp =
1943                                list_entry(t, struct elf_thread_status, list);
1944
1945                for (i = 0; i < tmp->num_notes; i++)
1946                        if (!writenote(&tmp->notes[i], file, foffset))
1947                                return 0;
1948        }
1949
1950        return 1;
1951}
1952
1953static void free_note_info(struct elf_note_info *info)
1954{
1955        while (!list_empty(&info->thread_list)) {
1956                struct list_head *tmp = info->thread_list.next;
1957                list_del(tmp);
1958                kfree(list_entry(tmp, struct elf_thread_status, list));
1959        }
1960
1961        /* Free data allocated by fill_files_note(): */
1962        vfree(info->notes[4].data);
1963
1964        kfree(info->prstatus);
1965        kfree(info->psinfo);
1966        kfree(info->notes);
1967        kfree(info->fpu);
1968#ifdef ELF_CORE_COPY_XFPREGS
1969        kfree(info->xfpu);
1970#endif
1971}
1972
1973#endif
1974
1975static struct vm_area_struct *first_vma(struct task_struct *tsk,
1976                                        struct vm_area_struct *gate_vma)
1977{
1978        struct vm_area_struct *ret = tsk->mm->mmap;
1979
1980        if (ret)
1981                return ret;
1982        return gate_vma;
1983}
1984/*
1985 * Helper function for iterating across a vma list.  It ensures that the caller
1986 * will visit `gate_vma' prior to terminating the search.
1987 */
1988static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1989                                        struct vm_area_struct *gate_vma)
1990{
1991        struct vm_area_struct *ret;
1992
1993        ret = this_vma->vm_next;
1994        if (ret)
1995                return ret;
1996        if (this_vma == gate_vma)
1997                return NULL;
1998        return gate_vma;
1999}
2000
2001static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2002                             elf_addr_t e_shoff, int segs)
2003{
2004        elf->e_shoff = e_shoff;
2005        elf->e_shentsize = sizeof(*shdr4extnum);
2006        elf->e_shnum = 1;
2007        elf->e_shstrndx = SHN_UNDEF;
2008
2009        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2010
2011        shdr4extnum->sh_type = SHT_NULL;
2012        shdr4extnum->sh_size = elf->e_shnum;
2013        shdr4extnum->sh_link = elf->e_shstrndx;
2014        shdr4extnum->sh_info = segs;
2015}
2016
2017static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2018                                     unsigned long mm_flags)
2019{
2020        struct vm_area_struct *vma;
2021        size_t size = 0;
2022
2023        for (vma = first_vma(current, gate_vma); vma != NULL;
2024             vma = next_vma(vma, gate_vma))
2025                size += vma_dump_size(vma, mm_flags);
2026        return size;
2027}
2028
2029/*
2030 * Actual dumper
2031 *
2032 * This is a two-pass process; first we find the offsets of the bits,
2033 * and then they are actually written out.  If we run out of core limit
2034 * we just truncate.
2035 */
2036static int elf_core_dump(struct coredump_params *cprm)
2037{
2038        int has_dumped = 0;
2039        mm_segment_t fs;
2040        int segs;
2041        size_t size = 0;
2042        struct vm_area_struct *vma, *gate_vma;
2043        struct elfhdr *elf = NULL;
2044        loff_t offset = 0, dataoff, foffset;
2045        struct elf_note_info info;
2046        struct elf_phdr *phdr4note = NULL;
2047        struct elf_shdr *shdr4extnum = NULL;
2048        Elf_Half e_phnum;
2049        elf_addr_t e_shoff;
2050
2051        /*
2052         * We no longer stop all VM operations.
2053         * 
2054         * This is because those proceses that could possibly change map_count
2055         * or the mmap / vma pages are now blocked in do_exit on current
2056         * finishing this core dump.
2057         *
2058         * Only ptrace can touch these memory addresses, but it doesn't change
2059         * the map_count or the pages allocated. So no possibility of crashing
2060         * exists while dumping the mm->vm_next areas to the core file.
2061         */
2062  
2063        /* alloc memory for large data structures: too large to be on stack */
2064        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2065        if (!elf)
2066                goto out;
2067        /*
2068         * The number of segs are recored into ELF header as 16bit value.
2069         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2070         */
2071        segs = current->mm->map_count;
2072        segs += elf_core_extra_phdrs();
2073
2074        gate_vma = get_gate_vma(current->mm);
2075        if (gate_vma != NULL)
2076                segs++;
2077
2078        /* for notes section */
2079        segs++;
2080
2081        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2082         * this, kernel supports extended numbering. Have a look at
2083         * include/linux/elf.h for further information. */
2084        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2085
2086        /*
2087         * Collect all the non-memory information about the process for the
2088         * notes.  This also sets up the file header.
2089         */
2090        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2091                goto cleanup;
2092
2093        has_dumped = 1;
2094        current->flags |= PF_DUMPCORE;
2095  
2096        fs = get_fs();
2097        set_fs(KERNEL_DS);
2098
2099        offset += sizeof(*elf);                         /* Elf header */
2100        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2101        foffset = offset;
2102
2103        /* Write notes phdr entry */
2104        {
2105                size_t sz = get_note_info_size(&info);
2106
2107                sz += elf_coredump_extra_notes_size();
2108
2109                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2110                if (!phdr4note)
2111                        goto end_coredump;
2112
2113                fill_elf_note_phdr(phdr4note, sz, offset);
2114                offset += sz;
2115        }
2116
2117        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2118
2119        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2120        offset += elf_core_extra_data_size();
2121        e_shoff = offset;
2122
2123        if (e_phnum == PN_XNUM) {
2124                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2125                if (!shdr4extnum)
2126                        goto end_coredump;
2127                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2128        }
2129
2130        offset = dataoff;
2131
2132        size += sizeof(*elf);
2133        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2134                goto end_coredump;
2135
2136        size += sizeof(*phdr4note);
2137        if (size > cprm->limit
2138            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2139                goto end_coredump;
2140
2141        /* Write program headers for segments dump */
2142        for (vma = first_vma(current, gate_vma); vma != NULL;
2143                        vma = next_vma(vma, gate_vma)) {
2144                struct elf_phdr phdr;
2145
2146                phdr.p_type = PT_LOAD;
2147                phdr.p_offset = offset;
2148                phdr.p_vaddr = vma->vm_start;
2149                phdr.p_paddr = 0;
2150                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2151                phdr.p_memsz = vma->vm_end - vma->vm_start;
2152                offset += phdr.p_filesz;
2153                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2154                if (vma->vm_flags & VM_WRITE)
2155                        phdr.p_flags |= PF_W;
2156                if (vma->vm_flags & VM_EXEC)
2157                        phdr.p_flags |= PF_X;
2158                phdr.p_align = ELF_EXEC_PAGESIZE;
2159
2160                size += sizeof(phdr);
2161                if (size > cprm->limit
2162                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2163                        goto end_coredump;
2164        }
2165
2166        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2167                goto end_coredump;
2168
2169        /* write out the notes section */
2170        if (!write_note_info(&info, cprm->file, &foffset))
2171                goto end_coredump;
2172
2173        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2174                goto end_coredump;
2175
2176        /* Align to page */
2177        if (!dump_seek(cprm->file, dataoff - foffset))
2178                goto end_coredump;
2179
2180        for (vma = first_vma(current, gate_vma); vma != NULL;
2181                        vma = next_vma(vma, gate_vma)) {
2182                unsigned long addr;
2183                unsigned long end;
2184
2185                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2186
2187                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2188                        struct page *page;
2189                        int stop;
2190
2191                        page = get_dump_page(addr);
2192                        if (page) {
2193                                void *kaddr = kmap(page);
2194                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2195                                        !dump_write(cprm->file, kaddr,
2196                                                    PAGE_SIZE);
2197                                kunmap(page);
2198                                page_cache_release(page);
2199                        } else
2200                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2201                        if (stop)
2202                                goto end_coredump;
2203                }
2204        }
2205
2206        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2207                goto end_coredump;
2208
2209        if (e_phnum == PN_XNUM) {
2210                size += sizeof(*shdr4extnum);
2211                if (size > cprm->limit
2212                    || !dump_write(cprm->file, shdr4extnum,
2213                                   sizeof(*shdr4extnum)))
2214                        goto end_coredump;
2215        }
2216
2217end_coredump:
2218        set_fs(fs);
2219
2220cleanup:
2221        free_note_info(&info);
2222        kfree(shdr4extnum);
2223        kfree(phdr4note);
2224        kfree(elf);
2225out:
2226        return has_dumped;
2227}
2228
2229#endif          /* CONFIG_ELF_CORE */
2230
2231static int __init init_elf_binfmt(void)
2232{
2233        register_binfmt(&elf_format);
2234        return 0;
2235}
2236
2237static void __exit exit_elf_binfmt(void)
2238{
2239        /* Remove the COFF and ELF loaders. */
2240        unregister_binfmt(&elf_format);
2241}
2242
2243core_initcall(init_elf_binfmt);
2244module_exit(exit_elf_binfmt);
2245MODULE_LICENSE("GPL");
2246
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.