linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/security.h>
  31#include <linux/random.h>
  32#include <linux/elf.h>
  33#include <linux/utsname.h>
  34#include <asm/uaccess.h>
  35#include <asm/param.h>
  36#include <asm/page.h>
  37
  38static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  39static int load_elf_library(struct file *);
  40static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  41                                int, int, unsigned long);
  42
  43/*
  44 * If we don't support core dumping, then supply a NULL so we
  45 * don't even try.
  46 */
  47#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  48static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
  49#else
  50#define elf_core_dump   NULL
  51#endif
  52
  53#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  54#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  55#else
  56#define ELF_MIN_ALIGN   PAGE_SIZE
  57#endif
  58
  59#ifndef ELF_CORE_EFLAGS
  60#define ELF_CORE_EFLAGS 0
  61#endif
  62
  63#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  64#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  65#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  66
  67static struct linux_binfmt elf_format = {
  68                .module         = THIS_MODULE,
  69                .load_binary    = load_elf_binary,
  70                .load_shlib     = load_elf_library,
  71                .core_dump      = elf_core_dump,
  72                .min_coredump   = ELF_EXEC_PAGESIZE,
  73                .hasvdso        = 1
  74};
  75
  76#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  77
  78static int set_brk(unsigned long start, unsigned long end)
  79{
  80        start = ELF_PAGEALIGN(start);
  81        end = ELF_PAGEALIGN(end);
  82        if (end > start) {
  83                unsigned long addr;
  84                down_write(&current->mm->mmap_sem);
  85                addr = do_brk(start, end - start);
  86                up_write(&current->mm->mmap_sem);
  87                if (BAD_ADDR(addr))
  88                        return addr;
  89        }
  90        current->mm->start_brk = current->mm->brk = end;
  91        return 0;
  92}
  93
  94/* We need to explicitly zero any fractional pages
  95   after the data section (i.e. bss).  This would
  96   contain the junk from the file that should not
  97   be in memory
  98 */
  99static int padzero(unsigned long elf_bss)
 100{
 101        unsigned long nbyte;
 102
 103        nbyte = ELF_PAGEOFFSET(elf_bss);
 104        if (nbyte) {
 105                nbyte = ELF_MIN_ALIGN - nbyte;
 106                if (clear_user((void __user *) elf_bss, nbyte))
 107                        return -EFAULT;
 108        }
 109        return 0;
 110}
 111
 112/* Let's use some macros to make this stack manipulation a little clearer */
 113#ifdef CONFIG_STACK_GROWSUP
 114#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 115#define STACK_ROUND(sp, items) \
 116        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 117#define STACK_ALLOC(sp, len) ({ \
 118        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 119        old_sp; })
 120#else
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 122#define STACK_ROUND(sp, items) \
 123        (((unsigned long) (sp - items)) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 125#endif
 126
 127#ifndef ELF_BASE_PLATFORM
 128/*
 129 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 130 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 131 * will be copied to the user stack in the same manner as AT_PLATFORM.
 132 */
 133#define ELF_BASE_PLATFORM NULL
 134#endif
 135
 136static int
 137create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 138                unsigned long load_addr, unsigned long interp_load_addr)
 139{
 140        unsigned long p = bprm->p;
 141        int argc = bprm->argc;
 142        int envc = bprm->envc;
 143        elf_addr_t __user *argv;
 144        elf_addr_t __user *envp;
 145        elf_addr_t __user *sp;
 146        elf_addr_t __user *u_platform;
 147        elf_addr_t __user *u_base_platform;
 148        elf_addr_t __user *u_rand_bytes;
 149        const char *k_platform = ELF_PLATFORM;
 150        const char *k_base_platform = ELF_BASE_PLATFORM;
 151        unsigned char k_rand_bytes[16];
 152        int items;
 153        elf_addr_t *elf_info;
 154        int ei_index = 0;
 155        const struct cred *cred = current_cred();
 156        struct vm_area_struct *vma;
 157
 158        /*
 159         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 160         * evictions by the processes running on the same package. One
 161         * thing we can do is to shuffle the initial stack for them.
 162         */
 163
 164        p = arch_align_stack(p);
 165
 166        /*
 167         * If this architecture has a platform capability string, copy it
 168         * to userspace.  In some cases (Sparc), this info is impossible
 169         * for userspace to get any other way, in others (i386) it is
 170         * merely difficult.
 171         */
 172        u_platform = NULL;
 173        if (k_platform) {
 174                size_t len = strlen(k_platform) + 1;
 175
 176                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 177                if (__copy_to_user(u_platform, k_platform, len))
 178                        return -EFAULT;
 179        }
 180
 181        /*
 182         * If this architecture has a "base" platform capability
 183         * string, copy it to userspace.
 184         */
 185        u_base_platform = NULL;
 186        if (k_base_platform) {
 187                size_t len = strlen(k_base_platform) + 1;
 188
 189                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 190                if (__copy_to_user(u_base_platform, k_base_platform, len))
 191                        return -EFAULT;
 192        }
 193
 194        /*
 195         * Generate 16 random bytes for userspace PRNG seeding.
 196         */
 197        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 198        u_rand_bytes = (elf_addr_t __user *)
 199                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 200        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 201                return -EFAULT;
 202
 203        /* Create the ELF interpreter info */
 204        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 205        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 206#define NEW_AUX_ENT(id, val) \
 207        do { \
 208                elf_info[ei_index++] = id; \
 209                elf_info[ei_index++] = val; \
 210        } while (0)
 211
 212#ifdef ARCH_DLINFO
 213        /* 
 214         * ARCH_DLINFO must come first so PPC can do its special alignment of
 215         * AUXV.
 216         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 217         * ARCH_DLINFO changes
 218         */
 219        ARCH_DLINFO;
 220#endif
 221        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 222        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 223        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 224        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 225        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 226        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 227        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 228        NEW_AUX_ENT(AT_FLAGS, 0);
 229        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 230        NEW_AUX_ENT(AT_UID, cred->uid);
 231        NEW_AUX_ENT(AT_EUID, cred->euid);
 232        NEW_AUX_ENT(AT_GID, cred->gid);
 233        NEW_AUX_ENT(AT_EGID, cred->egid);
 234        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 235        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 236        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 237        if (k_platform) {
 238                NEW_AUX_ENT(AT_PLATFORM,
 239                            (elf_addr_t)(unsigned long)u_platform);
 240        }
 241        if (k_base_platform) {
 242                NEW_AUX_ENT(AT_BASE_PLATFORM,
 243                            (elf_addr_t)(unsigned long)u_base_platform);
 244        }
 245        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 246                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 247        }
 248#undef NEW_AUX_ENT
 249        /* AT_NULL is zero; clear the rest too */
 250        memset(&elf_info[ei_index], 0,
 251               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 252
 253        /* And advance past the AT_NULL entry.  */
 254        ei_index += 2;
 255
 256        sp = STACK_ADD(p, ei_index);
 257
 258        items = (argc + 1) + (envc + 1) + 1;
 259        bprm->p = STACK_ROUND(sp, items);
 260
 261        /* Point sp at the lowest address on the stack */
 262#ifdef CONFIG_STACK_GROWSUP
 263        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 264        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 265#else
 266        sp = (elf_addr_t __user *)bprm->p;
 267#endif
 268
 269
 270        /*
 271         * Grow the stack manually; some architectures have a limit on how
 272         * far ahead a user-space access may be in order to grow the stack.
 273         */
 274        vma = find_extend_vma(current->mm, bprm->p);
 275        if (!vma)
 276                return -EFAULT;
 277
 278        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 279        if (__put_user(argc, sp++))
 280                return -EFAULT;
 281        argv = sp;
 282        envp = argv + argc + 1;
 283
 284        /* Populate argv and envp */
 285        p = current->mm->arg_end = current->mm->arg_start;
 286        while (argc-- > 0) {
 287                size_t len;
 288                if (__put_user((elf_addr_t)p, argv++))
 289                        return -EFAULT;
 290                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 291                if (!len || len > MAX_ARG_STRLEN)
 292                        return -EINVAL;
 293                p += len;
 294        }
 295        if (__put_user(0, argv))
 296                return -EFAULT;
 297        current->mm->arg_end = current->mm->env_start = p;
 298        while (envc-- > 0) {
 299                size_t len;
 300                if (__put_user((elf_addr_t)p, envp++))
 301                        return -EFAULT;
 302                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 303                if (!len || len > MAX_ARG_STRLEN)
 304                        return -EINVAL;
 305                p += len;
 306        }
 307        if (__put_user(0, envp))
 308                return -EFAULT;
 309        current->mm->env_end = p;
 310
 311        /* Put the elf_info on the stack in the right place.  */
 312        sp = (elf_addr_t __user *)envp + 1;
 313        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 314                return -EFAULT;
 315        return 0;
 316}
 317
 318#ifndef elf_map
 319
 320static unsigned long elf_map(struct file *filep, unsigned long addr,
 321                struct elf_phdr *eppnt, int prot, int type,
 322                unsigned long total_size)
 323{
 324        unsigned long map_addr;
 325        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 326        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 327        addr = ELF_PAGESTART(addr);
 328        size = ELF_PAGEALIGN(size);
 329
 330        /* mmap() will return -EINVAL if given a zero size, but a
 331         * segment with zero filesize is perfectly valid */
 332        if (!size)
 333                return addr;
 334
 335        down_write(&current->mm->mmap_sem);
 336        /*
 337        * total_size is the size of the ELF (interpreter) image.
 338        * The _first_ mmap needs to know the full size, otherwise
 339        * randomization might put this image into an overlapping
 340        * position with the ELF binary image. (since size < total_size)
 341        * So we first map the 'big' image - and unmap the remainder at
 342        * the end. (which unmap is needed for ELF images with holes.)
 343        */
 344        if (total_size) {
 345                total_size = ELF_PAGEALIGN(total_size);
 346                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 347                if (!BAD_ADDR(map_addr))
 348                        do_munmap(current->mm, map_addr+size, total_size-size);
 349        } else
 350                map_addr = do_mmap(filep, addr, size, prot, type, off);
 351
 352        up_write(&current->mm->mmap_sem);
 353        return(map_addr);
 354}
 355
 356#endif /* !elf_map */
 357
 358static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 359{
 360        int i, first_idx = -1, last_idx = -1;
 361
 362        for (i = 0; i < nr; i++) {
 363                if (cmds[i].p_type == PT_LOAD) {
 364                        last_idx = i;
 365                        if (first_idx == -1)
 366                                first_idx = i;
 367                }
 368        }
 369        if (first_idx == -1)
 370                return 0;
 371
 372        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 373                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 374}
 375
 376
 377/* This is much more generalized than the library routine read function,
 378   so we keep this separate.  Technically the library read function
 379   is only provided so that we can read a.out libraries that have
 380   an ELF header */
 381
 382static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 383                struct file *interpreter, unsigned long *interp_map_addr,
 384                unsigned long no_base)
 385{
 386        struct elf_phdr *elf_phdata;
 387        struct elf_phdr *eppnt;
 388        unsigned long load_addr = 0;
 389        int load_addr_set = 0;
 390        unsigned long last_bss = 0, elf_bss = 0;
 391        unsigned long error = ~0UL;
 392        unsigned long total_size;
 393        int retval, i, size;
 394
 395        /* First of all, some simple consistency checks */
 396        if (interp_elf_ex->e_type != ET_EXEC &&
 397            interp_elf_ex->e_type != ET_DYN)
 398                goto out;
 399        if (!elf_check_arch(interp_elf_ex))
 400                goto out;
 401        if (!interpreter->f_op || !interpreter->f_op->mmap)
 402                goto out;
 403
 404        /*
 405         * If the size of this structure has changed, then punt, since
 406         * we will be doing the wrong thing.
 407         */
 408        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 409                goto out;
 410        if (interp_elf_ex->e_phnum < 1 ||
 411                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 412                goto out;
 413
 414        /* Now read in all of the header information */
 415        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 416        if (size > ELF_MIN_ALIGN)
 417                goto out;
 418        elf_phdata = kmalloc(size, GFP_KERNEL);
 419        if (!elf_phdata)
 420                goto out;
 421
 422        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 423                             (char *)elf_phdata,size);
 424        error = -EIO;
 425        if (retval != size) {
 426                if (retval < 0)
 427                        error = retval; 
 428                goto out_close;
 429        }
 430
 431        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 432        if (!total_size) {
 433                error = -EINVAL;
 434                goto out_close;
 435        }
 436
 437        eppnt = elf_phdata;
 438        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 439                if (eppnt->p_type == PT_LOAD) {
 440                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 441                        int elf_prot = 0;
 442                        unsigned long vaddr = 0;
 443                        unsigned long k, map_addr;
 444
 445                        if (eppnt->p_flags & PF_R)
 446                                elf_prot = PROT_READ;
 447                        if (eppnt->p_flags & PF_W)
 448                                elf_prot |= PROT_WRITE;
 449                        if (eppnt->p_flags & PF_X)
 450                                elf_prot |= PROT_EXEC;
 451                        vaddr = eppnt->p_vaddr;
 452                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 453                                elf_type |= MAP_FIXED;
 454                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 455                                load_addr = -vaddr;
 456
 457                        map_addr = elf_map(interpreter, load_addr + vaddr,
 458                                        eppnt, elf_prot, elf_type, total_size);
 459                        total_size = 0;
 460                        if (!*interp_map_addr)
 461                                *interp_map_addr = map_addr;
 462                        error = map_addr;
 463                        if (BAD_ADDR(map_addr))
 464                                goto out_close;
 465
 466                        if (!load_addr_set &&
 467                            interp_elf_ex->e_type == ET_DYN) {
 468                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 469                                load_addr_set = 1;
 470                        }
 471
 472                        /*
 473                         * Check to see if the section's size will overflow the
 474                         * allowed task size. Note that p_filesz must always be
 475                         * <= p_memsize so it's only necessary to check p_memsz.
 476                         */
 477                        k = load_addr + eppnt->p_vaddr;
 478                        if (BAD_ADDR(k) ||
 479                            eppnt->p_filesz > eppnt->p_memsz ||
 480                            eppnt->p_memsz > TASK_SIZE ||
 481                            TASK_SIZE - eppnt->p_memsz < k) {
 482                                error = -ENOMEM;
 483                                goto out_close;
 484                        }
 485
 486                        /*
 487                         * Find the end of the file mapping for this phdr, and
 488                         * keep track of the largest address we see for this.
 489                         */
 490                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 491                        if (k > elf_bss)
 492                                elf_bss = k;
 493
 494                        /*
 495                         * Do the same thing for the memory mapping - between
 496                         * elf_bss and last_bss is the bss section.
 497                         */
 498                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 499                        if (k > last_bss)
 500                                last_bss = k;
 501                }
 502        }
 503
 504        /*
 505         * Now fill out the bss section.  First pad the last page up
 506         * to the page boundary, and then perform a mmap to make sure
 507         * that there are zero-mapped pages up to and including the 
 508         * last bss page.
 509         */
 510        if (padzero(elf_bss)) {
 511                error = -EFAULT;
 512                goto out_close;
 513        }
 514
 515        /* What we have mapped so far */
 516        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 517
 518        /* Map the last of the bss segment */
 519        if (last_bss > elf_bss) {
 520                down_write(&current->mm->mmap_sem);
 521                error = do_brk(elf_bss, last_bss - elf_bss);
 522                up_write(&current->mm->mmap_sem);
 523                if (BAD_ADDR(error))
 524                        goto out_close;
 525        }
 526
 527        error = load_addr;
 528
 529out_close:
 530        kfree(elf_phdata);
 531out:
 532        return error;
 533}
 534
 535/*
 536 * These are the functions used to load ELF style executables and shared
 537 * libraries.  There is no binary dependent code anywhere else.
 538 */
 539
 540#define INTERPRETER_NONE 0
 541#define INTERPRETER_ELF 2
 542
 543#ifndef STACK_RND_MASK
 544#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 545#endif
 546
 547static unsigned long randomize_stack_top(unsigned long stack_top)
 548{
 549        unsigned int random_variable = 0;
 550
 551        if ((current->flags & PF_RANDOMIZE) &&
 552                !(current->personality & ADDR_NO_RANDOMIZE)) {
 553                random_variable = get_random_int() & STACK_RND_MASK;
 554                random_variable <<= PAGE_SHIFT;
 555        }
 556#ifdef CONFIG_STACK_GROWSUP
 557        return PAGE_ALIGN(stack_top) + random_variable;
 558#else
 559        return PAGE_ALIGN(stack_top) - random_variable;
 560#endif
 561}
 562
 563static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 564{
 565        struct file *interpreter = NULL; /* to shut gcc up */
 566        unsigned long load_addr = 0, load_bias = 0;
 567        int load_addr_set = 0;
 568        char * elf_interpreter = NULL;
 569        unsigned long error;
 570        struct elf_phdr *elf_ppnt, *elf_phdata;
 571        unsigned long elf_bss, elf_brk;
 572        int retval, i;
 573        unsigned int size;
 574        unsigned long elf_entry;
 575        unsigned long interp_load_addr = 0;
 576        unsigned long start_code, end_code, start_data, end_data;
 577        unsigned long reloc_func_desc = 0;
 578        int executable_stack = EXSTACK_DEFAULT;
 579        unsigned long def_flags = 0;
 580        struct {
 581                struct elfhdr elf_ex;
 582                struct elfhdr interp_elf_ex;
 583        } *loc;
 584
 585        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 586        if (!loc) {
 587                retval = -ENOMEM;
 588                goto out_ret;
 589        }
 590        
 591        /* Get the exec-header */
 592        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 593
 594        retval = -ENOEXEC;
 595        /* First of all, some simple consistency checks */
 596        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 597                goto out;
 598
 599        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 600                goto out;
 601        if (!elf_check_arch(&loc->elf_ex))
 602                goto out;
 603        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 604                goto out;
 605
 606        /* Now read in all of the header information */
 607        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 608                goto out;
 609        if (loc->elf_ex.e_phnum < 1 ||
 610                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 611                goto out;
 612        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 613        retval = -ENOMEM;
 614        elf_phdata = kmalloc(size, GFP_KERNEL);
 615        if (!elf_phdata)
 616                goto out;
 617
 618        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 619                             (char *)elf_phdata, size);
 620        if (retval != size) {
 621                if (retval >= 0)
 622                        retval = -EIO;
 623                goto out_free_ph;
 624        }
 625
 626        elf_ppnt = elf_phdata;
 627        elf_bss = 0;
 628        elf_brk = 0;
 629
 630        start_code = ~0UL;
 631        end_code = 0;
 632        start_data = 0;
 633        end_data = 0;
 634
 635        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 636                if (elf_ppnt->p_type == PT_INTERP) {
 637                        /* This is the program interpreter used for
 638                         * shared libraries - for now assume that this
 639                         * is an a.out format binary
 640                         */
 641                        retval = -ENOEXEC;
 642                        if (elf_ppnt->p_filesz > PATH_MAX || 
 643                            elf_ppnt->p_filesz < 2)
 644                                goto out_free_ph;
 645
 646                        retval = -ENOMEM;
 647                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 648                                                  GFP_KERNEL);
 649                        if (!elf_interpreter)
 650                                goto out_free_ph;
 651
 652                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 653                                             elf_interpreter,
 654                                             elf_ppnt->p_filesz);
 655                        if (retval != elf_ppnt->p_filesz) {
 656                                if (retval >= 0)
 657                                        retval = -EIO;
 658                                goto out_free_interp;
 659                        }
 660                        /* make sure path is NULL terminated */
 661                        retval = -ENOEXEC;
 662                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 663                                goto out_free_interp;
 664
 665                        /*
 666                         * The early SET_PERSONALITY here is so that the lookup
 667                         * for the interpreter happens in the namespace of the 
 668                         * to-be-execed image.  SET_PERSONALITY can select an
 669                         * alternate root.
 670                         *
 671                         * However, SET_PERSONALITY is NOT allowed to switch
 672                         * this task into the new images's memory mapping
 673                         * policy - that is, TASK_SIZE must still evaluate to
 674                         * that which is appropriate to the execing application.
 675                         * This is because exit_mmap() needs to have TASK_SIZE
 676                         * evaluate to the size of the old image.
 677                         *
 678                         * So if (say) a 64-bit application is execing a 32-bit
 679                         * application it is the architecture's responsibility
 680                         * to defer changing the value of TASK_SIZE until the
 681                         * switch really is going to happen - do this in
 682                         * flush_thread().      - akpm
 683                         */
 684                        SET_PERSONALITY(loc->elf_ex);
 685
 686                        interpreter = open_exec(elf_interpreter);
 687                        retval = PTR_ERR(interpreter);
 688                        if (IS_ERR(interpreter))
 689                                goto out_free_interp;
 690
 691                        /*
 692                         * If the binary is not readable then enforce
 693                         * mm->dumpable = 0 regardless of the interpreter's
 694                         * permissions.
 695                         */
 696                        if (file_permission(interpreter, MAY_READ) < 0)
 697                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 698
 699                        retval = kernel_read(interpreter, 0, bprm->buf,
 700                                             BINPRM_BUF_SIZE);
 701                        if (retval != BINPRM_BUF_SIZE) {
 702                                if (retval >= 0)
 703                                        retval = -EIO;
 704                                goto out_free_dentry;
 705                        }
 706
 707                        /* Get the exec headers */
 708                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 709                        break;
 710                }
 711                elf_ppnt++;
 712        }
 713
 714        elf_ppnt = elf_phdata;
 715        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 716                if (elf_ppnt->p_type == PT_GNU_STACK) {
 717                        if (elf_ppnt->p_flags & PF_X)
 718                                executable_stack = EXSTACK_ENABLE_X;
 719                        else
 720                                executable_stack = EXSTACK_DISABLE_X;
 721                        break;
 722                }
 723
 724        /* Some simple consistency checks for the interpreter */
 725        if (elf_interpreter) {
 726                retval = -ELIBBAD;
 727                /* Not an ELF interpreter */
 728                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 729                        goto out_free_dentry;
 730                /* Verify the interpreter has a valid arch */
 731                if (!elf_check_arch(&loc->interp_elf_ex))
 732                        goto out_free_dentry;
 733        } else {
 734                /* Executables without an interpreter also need a personality  */
 735                SET_PERSONALITY(loc->elf_ex);
 736        }
 737
 738        /* Flush all traces of the currently running executable */
 739        retval = flush_old_exec(bprm);
 740        if (retval)
 741                goto out_free_dentry;
 742
 743        /* OK, This is the point of no return */
 744        current->flags &= ~PF_FORKNOEXEC;
 745        current->mm->def_flags = def_flags;
 746
 747        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 748           may depend on the personality.  */
 749        SET_PERSONALITY(loc->elf_ex);
 750        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 751                current->personality |= READ_IMPLIES_EXEC;
 752
 753        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 754                current->flags |= PF_RANDOMIZE;
 755        arch_pick_mmap_layout(current->mm);
 756
 757        /* Do this so that we can load the interpreter, if need be.  We will
 758           change some of these later */
 759        current->mm->free_area_cache = current->mm->mmap_base;
 760        current->mm->cached_hole_size = 0;
 761        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 762                                 executable_stack);
 763        if (retval < 0) {
 764                send_sig(SIGKILL, current, 0);
 765                goto out_free_dentry;
 766        }
 767        
 768        current->mm->start_stack = bprm->p;
 769
 770        /* Now we do a little grungy work by mmaping the ELF image into
 771           the correct location in memory. */
 772        for(i = 0, elf_ppnt = elf_phdata;
 773            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 774                int elf_prot = 0, elf_flags;
 775                unsigned long k, vaddr;
 776
 777                if (elf_ppnt->p_type != PT_LOAD)
 778                        continue;
 779
 780                if (unlikely (elf_brk > elf_bss)) {
 781                        unsigned long nbyte;
 782                    
 783                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 784                           before this one. Map anonymous pages, if needed,
 785                           and clear the area.  */
 786                        retval = set_brk (elf_bss + load_bias,
 787                                          elf_brk + load_bias);
 788                        if (retval) {
 789                                send_sig(SIGKILL, current, 0);
 790                                goto out_free_dentry;
 791                        }
 792                        nbyte = ELF_PAGEOFFSET(elf_bss);
 793                        if (nbyte) {
 794                                nbyte = ELF_MIN_ALIGN - nbyte;
 795                                if (nbyte > elf_brk - elf_bss)
 796                                        nbyte = elf_brk - elf_bss;
 797                                if (clear_user((void __user *)elf_bss +
 798                                                        load_bias, nbyte)) {
 799                                        /*
 800                                         * This bss-zeroing can fail if the ELF
 801                                         * file specifies odd protections. So
 802                                         * we don't check the return value
 803                                         */
 804                                }
 805                        }
 806                }
 807
 808                if (elf_ppnt->p_flags & PF_R)
 809                        elf_prot |= PROT_READ;
 810                if (elf_ppnt->p_flags & PF_W)
 811                        elf_prot |= PROT_WRITE;
 812                if (elf_ppnt->p_flags & PF_X)
 813                        elf_prot |= PROT_EXEC;
 814
 815                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 816
 817                vaddr = elf_ppnt->p_vaddr;
 818                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 819                        elf_flags |= MAP_FIXED;
 820                } else if (loc->elf_ex.e_type == ET_DYN) {
 821                        /* Try and get dynamic programs out of the way of the
 822                         * default mmap base, as well as whatever program they
 823                         * might try to exec.  This is because the brk will
 824                         * follow the loader, and is not movable.  */
 825#ifdef CONFIG_X86
 826                        load_bias = 0;
 827#else
 828                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 829#endif
 830                }
 831
 832                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 833                                elf_prot, elf_flags, 0);
 834                if (BAD_ADDR(error)) {
 835                        send_sig(SIGKILL, current, 0);
 836                        retval = IS_ERR((void *)error) ?
 837                                PTR_ERR((void*)error) : -EINVAL;
 838                        goto out_free_dentry;
 839                }
 840
 841                if (!load_addr_set) {
 842                        load_addr_set = 1;
 843                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 844                        if (loc->elf_ex.e_type == ET_DYN) {
 845                                load_bias += error -
 846                                             ELF_PAGESTART(load_bias + vaddr);
 847                                load_addr += load_bias;
 848                                reloc_func_desc = load_bias;
 849                        }
 850                }
 851                k = elf_ppnt->p_vaddr;
 852                if (k < start_code)
 853                        start_code = k;
 854                if (start_data < k)
 855                        start_data = k;
 856
 857                /*
 858                 * Check to see if the section's size will overflow the
 859                 * allowed task size. Note that p_filesz must always be
 860                 * <= p_memsz so it is only necessary to check p_memsz.
 861                 */
 862                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 863                    elf_ppnt->p_memsz > TASK_SIZE ||
 864                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 865                        /* set_brk can never work. Avoid overflows. */
 866                        send_sig(SIGKILL, current, 0);
 867                        retval = -EINVAL;
 868                        goto out_free_dentry;
 869                }
 870
 871                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 872
 873                if (k > elf_bss)
 874                        elf_bss = k;
 875                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 876                        end_code = k;
 877                if (end_data < k)
 878                        end_data = k;
 879                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 880                if (k > elf_brk)
 881                        elf_brk = k;
 882        }
 883
 884        loc->elf_ex.e_entry += load_bias;
 885        elf_bss += load_bias;
 886        elf_brk += load_bias;
 887        start_code += load_bias;
 888        end_code += load_bias;
 889        start_data += load_bias;
 890        end_data += load_bias;
 891
 892        /* Calling set_brk effectively mmaps the pages that we need
 893         * for the bss and break sections.  We must do this before
 894         * mapping in the interpreter, to make sure it doesn't wind
 895         * up getting placed where the bss needs to go.
 896         */
 897        retval = set_brk(elf_bss, elf_brk);
 898        if (retval) {
 899                send_sig(SIGKILL, current, 0);
 900                goto out_free_dentry;
 901        }
 902        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 903                send_sig(SIGSEGV, current, 0);
 904                retval = -EFAULT; /* Nobody gets to see this, but.. */
 905                goto out_free_dentry;
 906        }
 907
 908        if (elf_interpreter) {
 909                unsigned long uninitialized_var(interp_map_addr);
 910
 911                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 912                                            interpreter,
 913                                            &interp_map_addr,
 914                                            load_bias);
 915                if (!IS_ERR((void *)elf_entry)) {
 916                        /*
 917                         * load_elf_interp() returns relocation
 918                         * adjustment
 919                         */
 920                        interp_load_addr = elf_entry;
 921                        elf_entry += loc->interp_elf_ex.e_entry;
 922                }
 923                if (BAD_ADDR(elf_entry)) {
 924                        force_sig(SIGSEGV, current);
 925                        retval = IS_ERR((void *)elf_entry) ?
 926                                        (int)elf_entry : -EINVAL;
 927                        goto out_free_dentry;
 928                }
 929                reloc_func_desc = interp_load_addr;
 930
 931                allow_write_access(interpreter);
 932                fput(interpreter);
 933                kfree(elf_interpreter);
 934        } else {
 935                elf_entry = loc->elf_ex.e_entry;
 936                if (BAD_ADDR(elf_entry)) {
 937                        force_sig(SIGSEGV, current);
 938                        retval = -EINVAL;
 939                        goto out_free_dentry;
 940                }
 941        }
 942
 943        kfree(elf_phdata);
 944
 945        set_binfmt(&elf_format);
 946
 947#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 948        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 949        if (retval < 0) {
 950                send_sig(SIGKILL, current, 0);
 951                goto out;
 952        }
 953#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 954
 955        install_exec_creds(bprm);
 956        current->flags &= ~PF_FORKNOEXEC;
 957        retval = create_elf_tables(bprm, &loc->elf_ex,
 958                          load_addr, interp_load_addr);
 959        if (retval < 0) {
 960                send_sig(SIGKILL, current, 0);
 961                goto out;
 962        }
 963        /* N.B. passed_fileno might not be initialized? */
 964        current->mm->end_code = end_code;
 965        current->mm->start_code = start_code;
 966        current->mm->start_data = start_data;
 967        current->mm->end_data = end_data;
 968        current->mm->start_stack = bprm->p;
 969
 970#ifdef arch_randomize_brk
 971        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
 972                current->mm->brk = current->mm->start_brk =
 973                        arch_randomize_brk(current->mm);
 974#endif
 975
 976        if (current->personality & MMAP_PAGE_ZERO) {
 977                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 978                   and some applications "depend" upon this behavior.
 979                   Since we do not have the power to recompile these, we
 980                   emulate the SVr4 behavior. Sigh. */
 981                down_write(&current->mm->mmap_sem);
 982                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 983                                MAP_FIXED | MAP_PRIVATE, 0);
 984                up_write(&current->mm->mmap_sem);
 985        }
 986
 987#ifdef ELF_PLAT_INIT
 988        /*
 989         * The ABI may specify that certain registers be set up in special
 990         * ways (on i386 %edx is the address of a DT_FINI function, for
 991         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 992         * that the e_entry field is the address of the function descriptor
 993         * for the startup routine, rather than the address of the startup
 994         * routine itself.  This macro performs whatever initialization to
 995         * the regs structure is required as well as any relocations to the
 996         * function descriptor entries when executing dynamically links apps.
 997         */
 998        ELF_PLAT_INIT(regs, reloc_func_desc);
 999#endif
1000
1001        start_thread(regs, elf_entry, bprm->p);
1002        retval = 0;
1003out:
1004        kfree(loc);
1005out_ret:
1006        return retval;
1007
1008        /* error cleanup */
1009out_free_dentry:
1010        allow_write_access(interpreter);
1011        if (interpreter)
1012                fput(interpreter);
1013out_free_interp:
1014        kfree(elf_interpreter);
1015out_free_ph:
1016        kfree(elf_phdata);
1017        goto out;
1018}
1019
1020/* This is really simpleminded and specialized - we are loading an
1021   a.out library that is given an ELF header. */
1022static int load_elf_library(struct file *file)
1023{
1024        struct elf_phdr *elf_phdata;
1025        struct elf_phdr *eppnt;
1026        unsigned long elf_bss, bss, len;
1027        int retval, error, i, j;
1028        struct elfhdr elf_ex;
1029
1030        error = -ENOEXEC;
1031        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032        if (retval != sizeof(elf_ex))
1033                goto out;
1034
1035        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                goto out;
1037
1038        /* First of all, some simple consistency checks */
1039        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                goto out;
1042
1043        /* Now read in all of the header information */
1044
1045        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048        error = -ENOMEM;
1049        elf_phdata = kmalloc(j, GFP_KERNEL);
1050        if (!elf_phdata)
1051                goto out;
1052
1053        eppnt = elf_phdata;
1054        error = -ENOEXEC;
1055        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056        if (retval != j)
1057                goto out_free_ph;
1058
1059        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                if ((eppnt + i)->p_type == PT_LOAD)
1061                        j++;
1062        if (j != 1)
1063                goto out_free_ph;
1064
1065        while (eppnt->p_type != PT_LOAD)
1066                eppnt++;
1067
1068        /* Now use mmap to map the library into memory. */
1069        down_write(&current->mm->mmap_sem);
1070        error = do_mmap(file,
1071                        ELF_PAGESTART(eppnt->p_vaddr),
1072                        (eppnt->p_filesz +
1073                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                        PROT_READ | PROT_WRITE | PROT_EXEC,
1075                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                        (eppnt->p_offset -
1077                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078        up_write(&current->mm->mmap_sem);
1079        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                goto out_free_ph;
1081
1082        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083        if (padzero(elf_bss)) {
1084                error = -EFAULT;
1085                goto out_free_ph;
1086        }
1087
1088        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                            ELF_MIN_ALIGN - 1);
1090        bss = eppnt->p_memsz + eppnt->p_vaddr;
1091        if (bss > len) {
1092                down_write(&current->mm->mmap_sem);
1093                do_brk(len, bss - len);
1094                up_write(&current->mm->mmap_sem);
1095        }
1096        error = 0;
1097
1098out_free_ph:
1099        kfree(elf_phdata);
1100out:
1101        return error;
1102}
1103
1104/*
1105 * Note that some platforms still use traditional core dumps and not
1106 * the ELF core dump.  Each platform can select it as appropriate.
1107 */
1108#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110/*
1111 * ELF core dumper
1112 *
1113 * Modelled on fs/exec.c:aout_core_dump()
1114 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1115 */
1116/*
1117 * These are the only things you should do on a core-file: use only these
1118 * functions to write out all the necessary info.
1119 */
1120static int dump_write(struct file *file, const void *addr, int nr)
1121{
1122        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1123}
1124
1125static int dump_seek(struct file *file, loff_t off)
1126{
1127        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1128                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1129                        return 0;
1130        } else {
1131                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1132                if (!buf)
1133                        return 0;
1134                while (off > 0) {
1135                        unsigned long n = off;
1136                        if (n > PAGE_SIZE)
1137                                n = PAGE_SIZE;
1138                        if (!dump_write(file, buf, n))
1139                                return 0;
1140                        off -= n;
1141                }
1142                free_page((unsigned long)buf);
1143        }
1144        return 1;
1145}
1146
1147/*
1148 * Decide what to dump of a segment, part, all or none.
1149 */
1150static unsigned long vma_dump_size(struct vm_area_struct *vma,
1151                                   unsigned long mm_flags)
1152{
1153#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1154
1155        /* The vma can be set up to tell us the answer directly.  */
1156        if (vma->vm_flags & VM_ALWAYSDUMP)
1157                goto whole;
1158
1159        /* Hugetlb memory check */
1160        if (vma->vm_flags & VM_HUGETLB) {
1161                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1162                        goto whole;
1163                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1164                        goto whole;
1165        }
1166
1167        /* Do not dump I/O mapped devices or special mappings */
1168        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1169                return 0;
1170
1171        /* By default, dump shared memory if mapped from an anonymous file. */
1172        if (vma->vm_flags & VM_SHARED) {
1173                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1174                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1175                        goto whole;
1176                return 0;
1177        }
1178
1179        /* Dump segments that have been written to.  */
1180        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1181                goto whole;
1182        if (vma->vm_file == NULL)
1183                return 0;
1184
1185        if (FILTER(MAPPED_PRIVATE))
1186                goto whole;
1187
1188        /*
1189         * If this looks like the beginning of a DSO or executable mapping,
1190         * check for an ELF header.  If we find one, dump the first page to
1191         * aid in determining what was mapped here.
1192         */
1193        if (FILTER(ELF_HEADERS) &&
1194            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1195                u32 __user *header = (u32 __user *) vma->vm_start;
1196                u32 word;
1197                mm_segment_t fs = get_fs();
1198                /*
1199                 * Doing it this way gets the constant folded by GCC.
1200                 */
1201                union {
1202                        u32 cmp;
1203                        char elfmag[SELFMAG];
1204                } magic;
1205                BUILD_BUG_ON(SELFMAG != sizeof word);
1206                magic.elfmag[EI_MAG0] = ELFMAG0;
1207                magic.elfmag[EI_MAG1] = ELFMAG1;
1208                magic.elfmag[EI_MAG2] = ELFMAG2;
1209                magic.elfmag[EI_MAG3] = ELFMAG3;
1210                /*
1211                 * Switch to the user "segment" for get_user(),
1212                 * then put back what elf_core_dump() had in place.
1213                 */
1214                set_fs(USER_DS);
1215                if (unlikely(get_user(word, header)))
1216                        word = 0;
1217                set_fs(fs);
1218                if (word == magic.cmp)
1219                        return PAGE_SIZE;
1220        }
1221
1222#undef  FILTER
1223
1224        return 0;
1225
1226whole:
1227        return vma->vm_end - vma->vm_start;
1228}
1229
1230/* An ELF note in memory */
1231struct memelfnote
1232{
1233        const char *name;
1234        int type;
1235        unsigned int datasz;
1236        void *data;
1237};
1238
1239static int notesize(struct memelfnote *en)
1240{
1241        int sz;
1242
1243        sz = sizeof(struct elf_note);
1244        sz += roundup(strlen(en->name) + 1, 4);
1245        sz += roundup(en->datasz, 4);
1246
1247        return sz;
1248}
1249
1250#define DUMP_WRITE(addr, nr, foffset)   \
1251        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1252
1253static int alignfile(struct file *file, loff_t *foffset)
1254{
1255        static const char buf[4] = { 0, };
1256        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1257        return 1;
1258}
1259
1260static int writenote(struct memelfnote *men, struct file *file,
1261                        loff_t *foffset)
1262{
1263        struct elf_note en;
1264        en.n_namesz = strlen(men->name) + 1;
1265        en.n_descsz = men->datasz;
1266        en.n_type = men->type;
1267
1268        DUMP_WRITE(&en, sizeof(en), foffset);
1269        DUMP_WRITE(men->name, en.n_namesz, foffset);
1270        if (!alignfile(file, foffset))
1271                return 0;
1272        DUMP_WRITE(men->data, men->datasz, foffset);
1273        if (!alignfile(file, foffset))
1274                return 0;
1275
1276        return 1;
1277}
1278#undef DUMP_WRITE
1279
1280#define DUMP_WRITE(addr, nr)    \
1281        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282                goto end_coredump;
1283#define DUMP_SEEK(off)  \
1284        if (!dump_seek(file, (off))) \
1285                goto end_coredump;
1286
1287static void fill_elf_header(struct elfhdr *elf, int segs,
1288                            u16 machine, u32 flags, u8 osabi)
1289{
1290        memset(elf, 0, sizeof(*elf));
1291
1292        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1293        elf->e_ident[EI_CLASS] = ELF_CLASS;
1294        elf->e_ident[EI_DATA] = ELF_DATA;
1295        elf->e_ident[EI_VERSION] = EV_CURRENT;
1296        elf->e_ident[EI_OSABI] = ELF_OSABI;
1297
1298        elf->e_type = ET_CORE;
1299        elf->e_machine = machine;
1300        elf->e_version = EV_CURRENT;
1301        elf->e_phoff = sizeof(struct elfhdr);
1302        elf->e_flags = flags;
1303        elf->e_ehsize = sizeof(struct elfhdr);
1304        elf->e_phentsize = sizeof(struct elf_phdr);
1305        elf->e_phnum = segs;
1306
1307        return;
1308}
1309
1310static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1311{
1312        phdr->p_type = PT_NOTE;
1313        phdr->p_offset = offset;
1314        phdr->p_vaddr = 0;
1315        phdr->p_paddr = 0;
1316        phdr->p_filesz = sz;
1317        phdr->p_memsz = 0;
1318        phdr->p_flags = 0;
1319        phdr->p_align = 0;
1320        return;
1321}
1322
1323static void fill_note(struct memelfnote *note, const char *name, int type, 
1324                unsigned int sz, void *data)
1325{
1326        note->name = name;
1327        note->type = type;
1328        note->datasz = sz;
1329        note->data = data;
1330        return;
1331}
1332
1333/*
1334 * fill up all the fields in prstatus from the given task struct, except
1335 * registers which need to be filled up separately.
1336 */
1337static void fill_prstatus(struct elf_prstatus *prstatus,
1338                struct task_struct *p, long signr)
1339{
1340        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1341        prstatus->pr_sigpend = p->pending.signal.sig[0];
1342        prstatus->pr_sighold = p->blocked.sig[0];
1343        prstatus->pr_pid = task_pid_vnr(p);
1344        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1345        prstatus->pr_pgrp = task_pgrp_vnr(p);
1346        prstatus->pr_sid = task_session_vnr(p);
1347        if (thread_group_leader(p)) {
1348                struct task_cputime cputime;
1349
1350                /*
1351                 * This is the record for the group leader.  It shows the
1352                 * group-wide total, not its individual thread total.
1353                 */
1354                thread_group_cputime(p, &cputime);
1355                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1356                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1357        } else {
1358                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1359                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1360        }
1361        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1362        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1363}
1364
1365static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1366                       struct mm_struct *mm)
1367{
1368        const struct cred *cred;
1369        unsigned int i, len;
1370        
1371        /* first copy the parameters from user space */
1372        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1373
1374        len = mm->arg_end - mm->arg_start;
1375        if (len >= ELF_PRARGSZ)
1376                len = ELF_PRARGSZ-1;
1377        if (copy_from_user(&psinfo->pr_psargs,
1378                           (const char __user *)mm->arg_start, len))
1379                return -EFAULT;
1380        for(i = 0; i < len; i++)
1381                if (psinfo->pr_psargs[i] == 0)
1382                        psinfo->pr_psargs[i] = ' ';
1383        psinfo->pr_psargs[len] = 0;
1384
1385        psinfo->pr_pid = task_pid_vnr(p);
1386        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1387        psinfo->pr_pgrp = task_pgrp_vnr(p);
1388        psinfo->pr_sid = task_session_vnr(p);
1389
1390        i = p->state ? ffz(~p->state) + 1 : 0;
1391        psinfo->pr_state = i;
1392        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1393        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1394        psinfo->pr_nice = task_nice(p);
1395        psinfo->pr_flag = p->flags;
1396        rcu_read_lock();
1397        cred = __task_cred(p);
1398        SET_UID(psinfo->pr_uid, cred->uid);
1399        SET_GID(psinfo->pr_gid, cred->gid);
1400        rcu_read_unlock();
1401        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1402        
1403        return 0;
1404}
1405
1406static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1407{
1408        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1409        int i = 0;
1410        do
1411                i += 2;
1412        while (auxv[i - 2] != AT_NULL);
1413        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1414}
1415
1416#ifdef CORE_DUMP_USE_REGSET
1417#include <linux/regset.h>
1418
1419struct elf_thread_core_info {
1420        struct elf_thread_core_info *next;
1421        struct task_struct *task;
1422        struct elf_prstatus prstatus;
1423        struct memelfnote notes[0];
1424};
1425
1426struct elf_note_info {
1427        struct elf_thread_core_info *thread;
1428        struct memelfnote psinfo;
1429        struct memelfnote auxv;
1430        size_t size;
1431        int thread_notes;
1432};
1433
1434/*
1435 * When a regset has a writeback hook, we call it on each thread before
1436 * dumping user memory.  On register window machines, this makes sure the
1437 * user memory backing the register data is up to date before we read it.
1438 */
1439static void do_thread_regset_writeback(struct task_struct *task,
1440                                       const struct user_regset *regset)
1441{
1442        if (regset->writeback)
1443                regset->writeback(task, regset, 1);
1444}
1445
1446static int fill_thread_core_info(struct elf_thread_core_info *t,
1447                                 const struct user_regset_view *view,
1448                                 long signr, size_t *total)
1449{
1450        unsigned int i;
1451
1452        /*
1453         * NT_PRSTATUS is the one special case, because the regset data
1454         * goes into the pr_reg field inside the note contents, rather
1455         * than being the whole note contents.  We fill the reset in here.
1456         * We assume that regset 0 is NT_PRSTATUS.
1457         */
1458        fill_prstatus(&t->prstatus, t->task, signr);
1459        (void) view->regsets[0].get(t->task, &view->regsets[0],
1460                                    0, sizeof(t->prstatus.pr_reg),
1461                                    &t->prstatus.pr_reg, NULL);
1462
1463        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1464                  sizeof(t->prstatus), &t->prstatus);
1465        *total += notesize(&t->notes[0]);
1466
1467        do_thread_regset_writeback(t->task, &view->regsets[0]);
1468
1469        /*
1470         * Each other regset might generate a note too.  For each regset
1471         * that has no core_note_type or is inactive, we leave t->notes[i]
1472         * all zero and we'll know to skip writing it later.
1473         */
1474        for (i = 1; i < view->n; ++i) {
1475                const struct user_regset *regset = &view->regsets[i];
1476                do_thread_regset_writeback(t->task, regset);
1477                if (regset->core_note_type &&
1478                    (!regset->active || regset->active(t->task, regset))) {
1479                        int ret;
1480                        size_t size = regset->n * regset->size;
1481                        void *data = kmalloc(size, GFP_KERNEL);
1482                        if (unlikely(!data))
1483                                return 0;
1484                        ret = regset->get(t->task, regset,
1485                                          0, size, data, NULL);
1486                        if (unlikely(ret))
1487                                kfree(data);
1488                        else {
1489                                if (regset->core_note_type != NT_PRFPREG)
1490                                        fill_note(&t->notes[i], "LINUX",
1491                                                  regset->core_note_type,
1492                                                  size, data);
1493                                else {
1494                                        t->prstatus.pr_fpvalid = 1;
1495                                        fill_note(&t->notes[i], "CORE",
1496                                                  NT_PRFPREG, size, data);
1497                                }
1498                                *total += notesize(&t->notes[i]);
1499                        }
1500                }
1501        }
1502
1503        return 1;
1504}
1505
1506static int fill_note_info(struct elfhdr *elf, int phdrs,
1507                          struct elf_note_info *info,
1508                          long signr, struct pt_regs *regs)
1509{
1510        struct task_struct *dump_task = current;
1511        const struct user_regset_view *view = task_user_regset_view(dump_task);
1512        struct elf_thread_core_info *t;
1513        struct elf_prpsinfo *psinfo;
1514        struct core_thread *ct;
1515        unsigned int i;
1516
1517        info->size = 0;
1518        info->thread = NULL;
1519
1520        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1521        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1522
1523        if (psinfo == NULL)
1524                return 0;
1525
1526        /*
1527         * Figure out how many notes we're going to need for each thread.
1528         */
1529        info->thread_notes = 0;
1530        for (i = 0; i < view->n; ++i)
1531                if (view->regsets[i].core_note_type != 0)
1532                        ++info->thread_notes;
1533
1534        /*
1535         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1536         * since it is our one special case.
1537         */
1538        if (unlikely(info->thread_notes == 0) ||
1539            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1540                WARN_ON(1);
1541                return 0;
1542        }
1543
1544        /*
1545         * Initialize the ELF file header.
1546         */
1547        fill_elf_header(elf, phdrs,
1548                        view->e_machine, view->e_flags, view->ei_osabi);
1549
1550        /*
1551         * Allocate a structure for each thread.
1552         */
1553        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1554                t = kzalloc(offsetof(struct elf_thread_core_info,
1555                                     notes[info->thread_notes]),
1556                            GFP_KERNEL);
1557                if (unlikely(!t))
1558                        return 0;
1559
1560                t->task = ct->task;
1561                if (ct->task == dump_task || !info->thread) {
1562                        t->next = info->thread;
1563                        info->thread = t;
1564                } else {
1565                        /*
1566                         * Make sure to keep the original task at
1567                         * the head of the list.
1568                         */
1569                        t->next = info->thread->next;
1570                        info->thread->next = t;
1571                }
1572        }
1573
1574        /*
1575         * Now fill in each thread's information.
1576         */
1577        for (t = info->thread; t != NULL; t = t->next)
1578                if (!fill_thread_core_info(t, view, signr, &info->size))
1579                        return 0;
1580
1581        /*
1582         * Fill in the two process-wide notes.
1583         */
1584        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1585        info->size += notesize(&info->psinfo);
1586
1587        fill_auxv_note(&info->auxv, current->mm);
1588        info->size += notesize(&info->auxv);
1589
1590        return 1;
1591}
1592
1593static size_t get_note_info_size(struct elf_note_info *info)
1594{
1595        return info->size;
1596}
1597
1598/*
1599 * Write all the notes for each thread.  When writing the first thread, the
1600 * process-wide notes are interleaved after the first thread-specific note.
1601 */
1602static int write_note_info(struct elf_note_info *info,
1603                           struct file *file, loff_t *foffset)
1604{
1605        bool first = 1;
1606        struct elf_thread_core_info *t = info->thread;
1607
1608        do {
1609                int i;
1610
1611                if (!writenote(&t->notes[0], file, foffset))
1612                        return 0;
1613
1614                if (first && !writenote(&info->psinfo, file, foffset))
1615                        return 0;
1616                if (first && !writenote(&info->auxv, file, foffset))
1617                        return 0;
1618
1619                for (i = 1; i < info->thread_notes; ++i)
1620                        if (t->notes[i].data &&
1621                            !writenote(&t->notes[i], file, foffset))
1622                                return 0;
1623
1624                first = 0;
1625                t = t->next;
1626        } while (t);
1627
1628        return 1;
1629}
1630
1631static void free_note_info(struct elf_note_info *info)
1632{
1633        struct elf_thread_core_info *threads = info->thread;
1634        while (threads) {
1635                unsigned int i;
1636                struct elf_thread_core_info *t = threads;
1637                threads = t->next;
1638                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1639                for (i = 1; i < info->thread_notes; ++i)
1640                        kfree(t->notes[i].data);
1641                kfree(t);
1642        }
1643        kfree(info->psinfo.data);
1644}
1645
1646#else
1647
1648/* Here is the structure in which status of each thread is captured. */
1649struct elf_thread_status
1650{
1651        struct list_head list;
1652        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1653        elf_fpregset_t fpu;             /* NT_PRFPREG */
1654        struct task_struct *thread;
1655#ifdef ELF_CORE_COPY_XFPREGS
1656        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1657#endif
1658        struct memelfnote notes[3];
1659        int num_notes;
1660};
1661
1662/*
1663 * In order to add the specific thread information for the elf file format,
1664 * we need to keep a linked list of every threads pr_status and then create
1665 * a single section for them in the final core file.
1666 */
1667static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1668{
1669        int sz = 0;
1670        struct task_struct *p = t->thread;
1671        t->num_notes = 0;
1672
1673        fill_prstatus(&t->prstatus, p, signr);
1674        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1675        
1676        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1677                  &(t->prstatus));
1678        t->num_notes++;
1679        sz += notesize(&t->notes[0]);
1680
1681        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1682                                                                &t->fpu))) {
1683                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1684                          &(t->fpu));
1685                t->num_notes++;
1686                sz += notesize(&t->notes[1]);
1687        }
1688
1689#ifdef ELF_CORE_COPY_XFPREGS
1690        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1691                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1692                          sizeof(t->xfpu), &t->xfpu);
1693                t->num_notes++;
1694                sz += notesize(&t->notes[2]);
1695        }
1696#endif  
1697        return sz;
1698}
1699
1700struct elf_note_info {
1701        struct memelfnote *notes;
1702        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1703        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1704        struct list_head thread_list;
1705        elf_fpregset_t *fpu;
1706#ifdef ELF_CORE_COPY_XFPREGS
1707        elf_fpxregset_t *xfpu;
1708#endif
1709        int thread_status_size;
1710        int numnote;
1711};
1712
1713static int fill_note_info(struct elfhdr *elf, int phdrs,
1714                          struct elf_note_info *info,
1715                          long signr, struct pt_regs *regs)
1716{
1717#define NUM_NOTES       6
1718        struct list_head *t;
1719
1720        info->notes = NULL;
1721        info->prstatus = NULL;
1722        info->psinfo = NULL;
1723        info->fpu = NULL;
1724#ifdef ELF_CORE_COPY_XFPREGS
1725        info->xfpu = NULL;
1726#endif
1727        INIT_LIST_HEAD(&info->thread_list);
1728
1729        info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1730                              GFP_KERNEL);
1731        if (!info->notes)
1732                return 0;
1733        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1734        if (!info->psinfo)
1735                return 0;
1736        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1737        if (!info->prstatus)
1738                return 0;
1739        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1740        if (!info->fpu)
1741                return 0;
1742#ifdef ELF_CORE_COPY_XFPREGS
1743        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1744        if (!info->xfpu)
1745                return 0;
1746#endif
1747
1748        info->thread_status_size = 0;
1749        if (signr) {
1750                struct core_thread *ct;
1751                struct elf_thread_status *ets;
1752
1753                for (ct = current->mm->core_state->dumper.next;
1754                                                ct; ct = ct->next) {
1755                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1756                        if (!ets)
1757                                return 0;
1758
1759                        ets->thread = ct->task;
1760                        list_add(&ets->list, &info->thread_list);
1761                }
1762
1763                list_for_each(t, &info->thread_list) {
1764                        int sz;
1765
1766                        ets = list_entry(t, struct elf_thread_status, list);
1767                        sz = elf_dump_thread_status(signr, ets);
1768                        info->thread_status_size += sz;
1769                }
1770        }
1771        /* now collect the dump for the current */
1772        memset(info->prstatus, 0, sizeof(*info->prstatus));
1773        fill_prstatus(info->prstatus, current, signr);
1774        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1775
1776        /* Set up header */
1777        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1778
1779        /*
1780         * Set up the notes in similar form to SVR4 core dumps made
1781         * with info from their /proc.
1782         */
1783
1784        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1785                  sizeof(*info->prstatus), info->prstatus);
1786        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1787        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1788                  sizeof(*info->psinfo), info->psinfo);
1789
1790        info->numnote = 2;
1791
1792        fill_auxv_note(&info->notes[info->numnote++], current->mm);
1793
1794        /* Try to dump the FPU. */
1795        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1796                                                               info->fpu);
1797        if (info->prstatus->pr_fpvalid)
1798                fill_note(info->notes + info->numnote++,
1799                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1800#ifdef ELF_CORE_COPY_XFPREGS
1801        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1802                fill_note(info->notes + info->numnote++,
1803                          "LINUX", ELF_CORE_XFPREG_TYPE,
1804                          sizeof(*info->xfpu), info->xfpu);
1805#endif
1806
1807        return 1;
1808
1809#undef NUM_NOTES
1810}
1811
1812static size_t get_note_info_size(struct elf_note_info *info)
1813{
1814        int sz = 0;
1815        int i;
1816
1817        for (i = 0; i < info->numnote; i++)
1818                sz += notesize(info->notes + i);
1819
1820        sz += info->thread_status_size;
1821
1822        return sz;
1823}
1824
1825static int write_note_info(struct elf_note_info *info,
1826                           struct file *file, loff_t *foffset)
1827{
1828        int i;
1829        struct list_head *t;
1830
1831        for (i = 0; i < info->numnote; i++)
1832                if (!writenote(info->notes + i, file, foffset))
1833                        return 0;
1834
1835        /* write out the thread status notes section */
1836        list_for_each(t, &info->thread_list) {
1837                struct elf_thread_status *tmp =
1838                                list_entry(t, struct elf_thread_status, list);
1839
1840                for (i = 0; i < tmp->num_notes; i++)
1841                        if (!writenote(&tmp->notes[i], file, foffset))
1842                                return 0;
1843        }
1844
1845        return 1;
1846}
1847
1848static void free_note_info(struct elf_note_info *info)
1849{
1850        while (!list_empty(&info->thread_list)) {
1851                struct list_head *tmp = info->thread_list.next;
1852                list_del(tmp);
1853                kfree(list_entry(tmp, struct elf_thread_status, list));
1854        }
1855
1856        kfree(info->prstatus);
1857        kfree(info->psinfo);
1858        kfree(info->notes);
1859        kfree(info->fpu);
1860#ifdef ELF_CORE_COPY_XFPREGS
1861        kfree(info->xfpu);
1862#endif
1863}
1864
1865#endif
1866
1867static struct vm_area_struct *first_vma(struct task_struct *tsk,
1868                                        struct vm_area_struct *gate_vma)
1869{
1870        struct vm_area_struct *ret = tsk->mm->mmap;
1871
1872        if (ret)
1873                return ret;
1874        return gate_vma;
1875}
1876/*
1877 * Helper function for iterating across a vma list.  It ensures that the caller
1878 * will visit `gate_vma' prior to terminating the search.
1879 */
1880static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1881                                        struct vm_area_struct *gate_vma)
1882{
1883        struct vm_area_struct *ret;
1884
1885        ret = this_vma->vm_next;
1886        if (ret)
1887                return ret;
1888        if (this_vma == gate_vma)
1889                return NULL;
1890        return gate_vma;
1891}
1892
1893/*
1894 * Actual dumper
1895 *
1896 * This is a two-pass process; first we find the offsets of the bits,
1897 * and then they are actually written out.  If we run out of core limit
1898 * we just truncate.
1899 */
1900static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1901{
1902        int has_dumped = 0;
1903        mm_segment_t fs;
1904        int segs;
1905        size_t size = 0;
1906        struct vm_area_struct *vma, *gate_vma;
1907        struct elfhdr *elf = NULL;
1908        loff_t offset = 0, dataoff, foffset;
1909        unsigned long mm_flags;
1910        struct elf_note_info info;
1911
1912        /*
1913         * We no longer stop all VM operations.
1914         * 
1915         * This is because those proceses that could possibly change map_count
1916         * or the mmap / vma pages are now blocked in do_exit on current
1917         * finishing this core dump.
1918         *
1919         * Only ptrace can touch these memory addresses, but it doesn't change
1920         * the map_count or the pages allocated. So no possibility of crashing
1921         * exists while dumping the mm->vm_next areas to the core file.
1922         */
1923  
1924        /* alloc memory for large data structures: too large to be on stack */
1925        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1926        if (!elf)
1927                goto out;
1928        
1929        segs = current->mm->map_count;
1930#ifdef ELF_CORE_EXTRA_PHDRS
1931        segs += ELF_CORE_EXTRA_PHDRS;
1932#endif
1933
1934        gate_vma = get_gate_vma(current);
1935        if (gate_vma != NULL)
1936                segs++;
1937
1938        /*
1939         * Collect all the non-memory information about the process for the
1940         * notes.  This also sets up the file header.
1941         */
1942        if (!fill_note_info(elf, segs + 1, /* including notes section */
1943                            &info, signr, regs))
1944                goto cleanup;
1945
1946        has_dumped = 1;
1947        current->flags |= PF_DUMPCORE;
1948  
1949        fs = get_fs();
1950        set_fs(KERNEL_DS);
1951
1952        DUMP_WRITE(elf, sizeof(*elf));
1953        offset += sizeof(*elf);                         /* Elf header */
1954        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1955        foffset = offset;
1956
1957        /* Write notes phdr entry */
1958        {
1959                struct elf_phdr phdr;
1960                size_t sz = get_note_info_size(&info);
1961
1962                sz += elf_coredump_extra_notes_size();
1963
1964                fill_elf_note_phdr(&phdr, sz, offset);
1965                offset += sz;
1966                DUMP_WRITE(&phdr, sizeof(phdr));
1967        }
1968
1969        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1970
1971        /*
1972         * We must use the same mm->flags while dumping core to avoid
1973         * inconsistency between the program headers and bodies, otherwise an
1974         * unusable core file can be generated.
1975         */
1976        mm_flags = current->mm->flags;
1977
1978        /* Write program headers for segments dump */
1979        for (vma = first_vma(current, gate_vma); vma != NULL;
1980                        vma = next_vma(vma, gate_vma)) {
1981                struct elf_phdr phdr;
1982
1983                phdr.p_type = PT_LOAD;
1984                phdr.p_offset = offset;
1985                phdr.p_vaddr = vma->vm_start;
1986                phdr.p_paddr = 0;
1987                phdr.p_filesz = vma_dump_size(vma, mm_flags);
1988                phdr.p_memsz = vma->vm_end - vma->vm_start;
1989                offset += phdr.p_filesz;
1990                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1991                if (vma->vm_flags & VM_WRITE)
1992                        phdr.p_flags |= PF_W;
1993                if (vma->vm_flags & VM_EXEC)
1994                        phdr.p_flags |= PF_X;
1995                phdr.p_align = ELF_EXEC_PAGESIZE;
1996
1997                DUMP_WRITE(&phdr, sizeof(phdr));
1998        }
1999
2000#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2001        ELF_CORE_WRITE_EXTRA_PHDRS;
2002#endif
2003
2004        /* write out the notes section */
2005        if (!write_note_info(&info, file, &foffset))
2006                goto end_coredump;
2007
2008        if (elf_coredump_extra_notes_write(file, &foffset))
2009                goto end_coredump;
2010
2011        /* Align to page */
2012        DUMP_SEEK(dataoff - foffset);
2013
2014        for (vma = first_vma(current, gate_vma); vma != NULL;
2015                        vma = next_vma(vma, gate_vma)) {
2016                unsigned long addr;
2017                unsigned long end;
2018
2019                end = vma->vm_start + vma_dump_size(vma, mm_flags);
2020
2021                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2022                        struct page *page;
2023                        struct vm_area_struct *tmp_vma;
2024
2025                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2026                                                &page, &tmp_vma) <= 0) {
2027                                DUMP_SEEK(PAGE_SIZE);
2028                        } else {
2029                                if (page == ZERO_PAGE(0)) {
2030                                        if (!dump_seek(file, PAGE_SIZE)) {
2031                                                page_cache_release(page);
2032                                                goto end_coredump;
2033                                        }
2034                                } else {
2035                                        void *kaddr;
2036                                        flush_cache_page(tmp_vma, addr,
2037                                                         page_to_pfn(page));
2038                                        kaddr = kmap(page);
2039                                        if ((size += PAGE_SIZE) > limit ||
2040                                            !dump_write(file, kaddr,
2041                                            PAGE_SIZE)) {
2042                                                kunmap(page);
2043                                                page_cache_release(page);
2044                                                goto end_coredump;
2045                                        }
2046                                        kunmap(page);
2047                                }
2048                                page_cache_release(page);
2049                        }
2050                }
2051        }
2052
2053#ifdef ELF_CORE_WRITE_EXTRA_DATA
2054        ELF_CORE_WRITE_EXTRA_DATA;
2055#endif
2056
2057end_coredump:
2058        set_fs(fs);
2059
2060cleanup:
2061        free_note_info(&info);
2062        kfree(elf);
2063out:
2064        return has_dumped;
2065}
2066
2067#endif          /* USE_ELF_CORE_DUMP */
2068
2069static int __init init_elf_binfmt(void)
2070{
2071        return register_binfmt(&elf_format);
2072}
2073
2074static void __exit exit_elf_binfmt(void)
2075{
2076        /* Remove the COFF and ELF loaders. */
2077        unregister_binfmt(&elf_format);
2078}
2079
2080core_initcall(init_elf_binfmt);
2081module_exit(exit_elf_binfmt);
2082MODULE_LICENSE("GPL");
2083
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.