linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/errno.h>
  20#include <linux/signal.h>
  21#include <linux/binfmts.h>
  22#include <linux/string.h>
  23#include <linux/file.h>
  24#include <linux/fcntl.h>
  25#include <linux/ptrace.h>
  26#include <linux/slab.h>
  27#include <linux/shm.h>
  28#include <linux/personality.h>
  29#include <linux/elfcore.h>
  30#include <linux/init.h>
  31#include <linux/highuid.h>
  32#include <linux/smp.h>
  33#include <linux/compiler.h>
  34#include <linux/highmem.h>
  35#include <linux/pagemap.h>
  36#include <linux/security.h>
  37#include <linux/syscalls.h>
  38#include <linux/random.h>
  39#include <linux/elf.h>
  40#include <linux/utsname.h>
  41#include <asm/uaccess.h>
  42#include <asm/param.h>
  43#include <asm/page.h>
  44
  45static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  46static int load_elf_library(struct file *);
  47static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  48                                int, int, unsigned long);
  49
  50/*
  51 * If we don't support core dumping, then supply a NULL so we
  52 * don't even try.
  53 */
  54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
  56#else
  57#define elf_core_dump   NULL
  58#endif
  59
  60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  61#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  62#else
  63#define ELF_MIN_ALIGN   PAGE_SIZE
  64#endif
  65
  66#ifndef ELF_CORE_EFLAGS
  67#define ELF_CORE_EFLAGS 0
  68#endif
  69
  70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  73
  74static struct linux_binfmt elf_format = {
  75                .module         = THIS_MODULE,
  76                .load_binary    = load_elf_binary,
  77                .load_shlib     = load_elf_library,
  78                .core_dump      = elf_core_dump,
  79                .min_coredump   = ELF_EXEC_PAGESIZE,
  80                .hasvdso        = 1
  81};
  82
  83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  84
  85static int set_brk(unsigned long start, unsigned long end)
  86{
  87        start = ELF_PAGEALIGN(start);
  88        end = ELF_PAGEALIGN(end);
  89        if (end > start) {
  90                unsigned long addr;
  91                down_write(&current->mm->mmap_sem);
  92                addr = do_brk(start, end - start);
  93                up_write(&current->mm->mmap_sem);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a little clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134#ifndef ELF_BASE_PLATFORM
 135/*
 136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 138 * will be copied to the user stack in the same manner as AT_PLATFORM.
 139 */
 140#define ELF_BASE_PLATFORM NULL
 141#endif
 142
 143static int
 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 145                unsigned long load_addr, unsigned long interp_load_addr)
 146{
 147        unsigned long p = bprm->p;
 148        int argc = bprm->argc;
 149        int envc = bprm->envc;
 150        elf_addr_t __user *argv;
 151        elf_addr_t __user *envp;
 152        elf_addr_t __user *sp;
 153        elf_addr_t __user *u_platform;
 154        elf_addr_t __user *u_base_platform;
 155        elf_addr_t __user *u_rand_bytes;
 156        const char *k_platform = ELF_PLATFORM;
 157        const char *k_base_platform = ELF_BASE_PLATFORM;
 158        unsigned char k_rand_bytes[16];
 159        int items;
 160        elf_addr_t *elf_info;
 161        int ei_index = 0;
 162        const struct cred *cred = current_cred();
 163        struct vm_area_struct *vma;
 164
 165        /*
 166         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 167         * evictions by the processes running on the same package. One
 168         * thing we can do is to shuffle the initial stack for them.
 169         */
 170
 171        p = arch_align_stack(p);
 172
 173        /*
 174         * If this architecture has a platform capability string, copy it
 175         * to userspace.  In some cases (Sparc), this info is impossible
 176         * for userspace to get any other way, in others (i386) it is
 177         * merely difficult.
 178         */
 179        u_platform = NULL;
 180        if (k_platform) {
 181                size_t len = strlen(k_platform) + 1;
 182
 183                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 184                if (__copy_to_user(u_platform, k_platform, len))
 185                        return -EFAULT;
 186        }
 187
 188        /*
 189         * If this architecture has a "base" platform capability
 190         * string, copy it to userspace.
 191         */
 192        u_base_platform = NULL;
 193        if (k_base_platform) {
 194                size_t len = strlen(k_base_platform) + 1;
 195
 196                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 197                if (__copy_to_user(u_base_platform, k_base_platform, len))
 198                        return -EFAULT;
 199        }
 200
 201        /*
 202         * Generate 16 random bytes for userspace PRNG seeding.
 203         */
 204        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 205        u_rand_bytes = (elf_addr_t __user *)
 206                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 207        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 208                return -EFAULT;
 209
 210        /* Create the ELF interpreter info */
 211        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 212        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 213#define NEW_AUX_ENT(id, val) \
 214        do { \
 215                elf_info[ei_index++] = id; \
 216                elf_info[ei_index++] = val; \
 217        } while (0)
 218
 219#ifdef ARCH_DLINFO
 220        /* 
 221         * ARCH_DLINFO must come first so PPC can do its special alignment of
 222         * AUXV.
 223         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 224         * ARCH_DLINFO changes
 225         */
 226        ARCH_DLINFO;
 227#endif
 228        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 229        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 230        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 231        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 232        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 233        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 234        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 235        NEW_AUX_ENT(AT_FLAGS, 0);
 236        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 237        NEW_AUX_ENT(AT_UID, cred->uid);
 238        NEW_AUX_ENT(AT_EUID, cred->euid);
 239        NEW_AUX_ENT(AT_GID, cred->gid);
 240        NEW_AUX_ENT(AT_EGID, cred->egid);
 241        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 242        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 243        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 244        if (k_platform) {
 245                NEW_AUX_ENT(AT_PLATFORM,
 246                            (elf_addr_t)(unsigned long)u_platform);
 247        }
 248        if (k_base_platform) {
 249                NEW_AUX_ENT(AT_BASE_PLATFORM,
 250                            (elf_addr_t)(unsigned long)u_base_platform);
 251        }
 252        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 253                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 254        }
 255#undef NEW_AUX_ENT
 256        /* AT_NULL is zero; clear the rest too */
 257        memset(&elf_info[ei_index], 0,
 258               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 259
 260        /* And advance past the AT_NULL entry.  */
 261        ei_index += 2;
 262
 263        sp = STACK_ADD(p, ei_index);
 264
 265        items = (argc + 1) + (envc + 1) + 1;
 266        bprm->p = STACK_ROUND(sp, items);
 267
 268        /* Point sp at the lowest address on the stack */
 269#ifdef CONFIG_STACK_GROWSUP
 270        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 271        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 272#else
 273        sp = (elf_addr_t __user *)bprm->p;
 274#endif
 275
 276
 277        /*
 278         * Grow the stack manually; some architectures have a limit on how
 279         * far ahead a user-space access may be in order to grow the stack.
 280         */
 281        vma = find_extend_vma(current->mm, bprm->p);
 282        if (!vma)
 283                return -EFAULT;
 284
 285        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 286        if (__put_user(argc, sp++))
 287                return -EFAULT;
 288        argv = sp;
 289        envp = argv + argc + 1;
 290
 291        /* Populate argv and envp */
 292        p = current->mm->arg_end = current->mm->arg_start;
 293        while (argc-- > 0) {
 294                size_t len;
 295                if (__put_user((elf_addr_t)p, argv++))
 296                        return -EFAULT;
 297                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 298                if (!len || len > MAX_ARG_STRLEN)
 299                        return -EINVAL;
 300                p += len;
 301        }
 302        if (__put_user(0, argv))
 303                return -EFAULT;
 304        current->mm->arg_end = current->mm->env_start = p;
 305        while (envc-- > 0) {
 306                size_t len;
 307                if (__put_user((elf_addr_t)p, envp++))
 308                        return -EFAULT;
 309                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 310                if (!len || len > MAX_ARG_STRLEN)
 311                        return -EINVAL;
 312                p += len;
 313        }
 314        if (__put_user(0, envp))
 315                return -EFAULT;
 316        current->mm->env_end = p;
 317
 318        /* Put the elf_info on the stack in the right place.  */
 319        sp = (elf_addr_t __user *)envp + 1;
 320        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 321                return -EFAULT;
 322        return 0;
 323}
 324
 325#ifndef elf_map
 326
 327static unsigned long elf_map(struct file *filep, unsigned long addr,
 328                struct elf_phdr *eppnt, int prot, int type,
 329                unsigned long total_size)
 330{
 331        unsigned long map_addr;
 332        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 333        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 334        addr = ELF_PAGESTART(addr);
 335        size = ELF_PAGEALIGN(size);
 336
 337        /* mmap() will return -EINVAL if given a zero size, but a
 338         * segment with zero filesize is perfectly valid */
 339        if (!size)
 340                return addr;
 341
 342        down_write(&current->mm->mmap_sem);
 343        /*
 344        * total_size is the size of the ELF (interpreter) image.
 345        * The _first_ mmap needs to know the full size, otherwise
 346        * randomization might put this image into an overlapping
 347        * position with the ELF binary image. (since size < total_size)
 348        * So we first map the 'big' image - and unmap the remainder at
 349        * the end. (which unmap is needed for ELF images with holes.)
 350        */
 351        if (total_size) {
 352                total_size = ELF_PAGEALIGN(total_size);
 353                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 354                if (!BAD_ADDR(map_addr))
 355                        do_munmap(current->mm, map_addr+size, total_size-size);
 356        } else
 357                map_addr = do_mmap(filep, addr, size, prot, type, off);
 358
 359        up_write(&current->mm->mmap_sem);
 360        return(map_addr);
 361}
 362
 363#endif /* !elf_map */
 364
 365static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 366{
 367        int i, first_idx = -1, last_idx = -1;
 368
 369        for (i = 0; i < nr; i++) {
 370                if (cmds[i].p_type == PT_LOAD) {
 371                        last_idx = i;
 372                        if (first_idx == -1)
 373                                first_idx = i;
 374                }
 375        }
 376        if (first_idx == -1)
 377                return 0;
 378
 379        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 380                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 381}
 382
 383
 384/* This is much more generalized than the library routine read function,
 385   so we keep this separate.  Technically the library read function
 386   is only provided so that we can read a.out libraries that have
 387   an ELF header */
 388
 389static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 390                struct file *interpreter, unsigned long *interp_map_addr,
 391                unsigned long no_base)
 392{
 393        struct elf_phdr *elf_phdata;
 394        struct elf_phdr *eppnt;
 395        unsigned long load_addr = 0;
 396        int load_addr_set = 0;
 397        unsigned long last_bss = 0, elf_bss = 0;
 398        unsigned long error = ~0UL;
 399        unsigned long total_size;
 400        int retval, i, size;
 401
 402        /* First of all, some simple consistency checks */
 403        if (interp_elf_ex->e_type != ET_EXEC &&
 404            interp_elf_ex->e_type != ET_DYN)
 405                goto out;
 406        if (!elf_check_arch(interp_elf_ex))
 407                goto out;
 408        if (!interpreter->f_op || !interpreter->f_op->mmap)
 409                goto out;
 410
 411        /*
 412         * If the size of this structure has changed, then punt, since
 413         * we will be doing the wrong thing.
 414         */
 415        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 416                goto out;
 417        if (interp_elf_ex->e_phnum < 1 ||
 418                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 419                goto out;
 420
 421        /* Now read in all of the header information */
 422        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 423        if (size > ELF_MIN_ALIGN)
 424                goto out;
 425        elf_phdata = kmalloc(size, GFP_KERNEL);
 426        if (!elf_phdata)
 427                goto out;
 428
 429        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 430                             (char *)elf_phdata,size);
 431        error = -EIO;
 432        if (retval != size) {
 433                if (retval < 0)
 434                        error = retval; 
 435                goto out_close;
 436        }
 437
 438        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 439        if (!total_size) {
 440                error = -EINVAL;
 441                goto out_close;
 442        }
 443
 444        eppnt = elf_phdata;
 445        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 446                if (eppnt->p_type == PT_LOAD) {
 447                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 448                        int elf_prot = 0;
 449                        unsigned long vaddr = 0;
 450                        unsigned long k, map_addr;
 451
 452                        if (eppnt->p_flags & PF_R)
 453                                elf_prot = PROT_READ;
 454                        if (eppnt->p_flags & PF_W)
 455                                elf_prot |= PROT_WRITE;
 456                        if (eppnt->p_flags & PF_X)
 457                                elf_prot |= PROT_EXEC;
 458                        vaddr = eppnt->p_vaddr;
 459                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 460                                elf_type |= MAP_FIXED;
 461                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 462                                load_addr = -vaddr;
 463
 464                        map_addr = elf_map(interpreter, load_addr + vaddr,
 465                                        eppnt, elf_prot, elf_type, total_size);
 466                        total_size = 0;
 467                        if (!*interp_map_addr)
 468                                *interp_map_addr = map_addr;
 469                        error = map_addr;
 470                        if (BAD_ADDR(map_addr))
 471                                goto out_close;
 472
 473                        if (!load_addr_set &&
 474                            interp_elf_ex->e_type == ET_DYN) {
 475                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 476                                load_addr_set = 1;
 477                        }
 478
 479                        /*
 480                         * Check to see if the section's size will overflow the
 481                         * allowed task size. Note that p_filesz must always be
 482                         * <= p_memsize so it's only necessary to check p_memsz.
 483                         */
 484                        k = load_addr + eppnt->p_vaddr;
 485                        if (BAD_ADDR(k) ||
 486                            eppnt->p_filesz > eppnt->p_memsz ||
 487                            eppnt->p_memsz > TASK_SIZE ||
 488                            TASK_SIZE - eppnt->p_memsz < k) {
 489                                error = -ENOMEM;
 490                                goto out_close;
 491                        }
 492
 493                        /*
 494                         * Find the end of the file mapping for this phdr, and
 495                         * keep track of the largest address we see for this.
 496                         */
 497                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 498                        if (k > elf_bss)
 499                                elf_bss = k;
 500
 501                        /*
 502                         * Do the same thing for the memory mapping - between
 503                         * elf_bss and last_bss is the bss section.
 504                         */
 505                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 506                        if (k > last_bss)
 507                                last_bss = k;
 508                }
 509        }
 510
 511        /*
 512         * Now fill out the bss section.  First pad the last page up
 513         * to the page boundary, and then perform a mmap to make sure
 514         * that there are zero-mapped pages up to and including the 
 515         * last bss page.
 516         */
 517        if (padzero(elf_bss)) {
 518                error = -EFAULT;
 519                goto out_close;
 520        }
 521
 522        /* What we have mapped so far */
 523        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 524
 525        /* Map the last of the bss segment */
 526        if (last_bss > elf_bss) {
 527                down_write(&current->mm->mmap_sem);
 528                error = do_brk(elf_bss, last_bss - elf_bss);
 529                up_write(&current->mm->mmap_sem);
 530                if (BAD_ADDR(error))
 531                        goto out_close;
 532        }
 533
 534        error = load_addr;
 535
 536out_close:
 537        kfree(elf_phdata);
 538out:
 539        return error;
 540}
 541
 542/*
 543 * These are the functions used to load ELF style executables and shared
 544 * libraries.  There is no binary dependent code anywhere else.
 545 */
 546
 547#define INTERPRETER_NONE 0
 548#define INTERPRETER_ELF 2
 549
 550#ifndef STACK_RND_MASK
 551#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 552#endif
 553
 554static unsigned long randomize_stack_top(unsigned long stack_top)
 555{
 556        unsigned int random_variable = 0;
 557
 558        if ((current->flags & PF_RANDOMIZE) &&
 559                !(current->personality & ADDR_NO_RANDOMIZE)) {
 560                random_variable = get_random_int() & STACK_RND_MASK;
 561                random_variable <<= PAGE_SHIFT;
 562        }
 563#ifdef CONFIG_STACK_GROWSUP
 564        return PAGE_ALIGN(stack_top) + random_variable;
 565#else
 566        return PAGE_ALIGN(stack_top) - random_variable;
 567#endif
 568}
 569
 570static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 571{
 572        struct file *interpreter = NULL; /* to shut gcc up */
 573        unsigned long load_addr = 0, load_bias = 0;
 574        int load_addr_set = 0;
 575        char * elf_interpreter = NULL;
 576        unsigned long error;
 577        struct elf_phdr *elf_ppnt, *elf_phdata;
 578        unsigned long elf_bss, elf_brk;
 579        int elf_exec_fileno;
 580        int retval, i;
 581        unsigned int size;
 582        unsigned long elf_entry;
 583        unsigned long interp_load_addr = 0;
 584        unsigned long start_code, end_code, start_data, end_data;
 585        unsigned long reloc_func_desc = 0;
 586        int executable_stack = EXSTACK_DEFAULT;
 587        unsigned long def_flags = 0;
 588        struct {
 589                struct elfhdr elf_ex;
 590                struct elfhdr interp_elf_ex;
 591        } *loc;
 592
 593        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 594        if (!loc) {
 595                retval = -ENOMEM;
 596                goto out_ret;
 597        }
 598        
 599        /* Get the exec-header */
 600        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 601
 602        retval = -ENOEXEC;
 603        /* First of all, some simple consistency checks */
 604        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 605                goto out;
 606
 607        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 608                goto out;
 609        if (!elf_check_arch(&loc->elf_ex))
 610                goto out;
 611        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 612                goto out;
 613
 614        /* Now read in all of the header information */
 615        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 616                goto out;
 617        if (loc->elf_ex.e_phnum < 1 ||
 618                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 619                goto out;
 620        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 621        retval = -ENOMEM;
 622        elf_phdata = kmalloc(size, GFP_KERNEL);
 623        if (!elf_phdata)
 624                goto out;
 625
 626        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 627                             (char *)elf_phdata, size);
 628        if (retval != size) {
 629                if (retval >= 0)
 630                        retval = -EIO;
 631                goto out_free_ph;
 632        }
 633
 634        retval = get_unused_fd();
 635        if (retval < 0)
 636                goto out_free_ph;
 637        get_file(bprm->file);
 638        fd_install(elf_exec_fileno = retval, bprm->file);
 639
 640        elf_ppnt = elf_phdata;
 641        elf_bss = 0;
 642        elf_brk = 0;
 643
 644        start_code = ~0UL;
 645        end_code = 0;
 646        start_data = 0;
 647        end_data = 0;
 648
 649        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 650                if (elf_ppnt->p_type == PT_INTERP) {
 651                        /* This is the program interpreter used for
 652                         * shared libraries - for now assume that this
 653                         * is an a.out format binary
 654                         */
 655                        retval = -ENOEXEC;
 656                        if (elf_ppnt->p_filesz > PATH_MAX || 
 657                            elf_ppnt->p_filesz < 2)
 658                                goto out_free_file;
 659
 660                        retval = -ENOMEM;
 661                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 662                                                  GFP_KERNEL);
 663                        if (!elf_interpreter)
 664                                goto out_free_file;
 665
 666                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 667                                             elf_interpreter,
 668                                             elf_ppnt->p_filesz);
 669                        if (retval != elf_ppnt->p_filesz) {
 670                                if (retval >= 0)
 671                                        retval = -EIO;
 672                                goto out_free_interp;
 673                        }
 674                        /* make sure path is NULL terminated */
 675                        retval = -ENOEXEC;
 676                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 677                                goto out_free_interp;
 678
 679                        /*
 680                         * The early SET_PERSONALITY here is so that the lookup
 681                         * for the interpreter happens in the namespace of the 
 682                         * to-be-execed image.  SET_PERSONALITY can select an
 683                         * alternate root.
 684                         *
 685                         * However, SET_PERSONALITY is NOT allowed to switch
 686                         * this task into the new images's memory mapping
 687                         * policy - that is, TASK_SIZE must still evaluate to
 688                         * that which is appropriate to the execing application.
 689                         * This is because exit_mmap() needs to have TASK_SIZE
 690                         * evaluate to the size of the old image.
 691                         *
 692                         * So if (say) a 64-bit application is execing a 32-bit
 693                         * application it is the architecture's responsibility
 694                         * to defer changing the value of TASK_SIZE until the
 695                         * switch really is going to happen - do this in
 696                         * flush_thread().      - akpm
 697                         */
 698                        SET_PERSONALITY(loc->elf_ex);
 699
 700                        interpreter = open_exec(elf_interpreter);
 701                        retval = PTR_ERR(interpreter);
 702                        if (IS_ERR(interpreter))
 703                                goto out_free_interp;
 704
 705                        /*
 706                         * If the binary is not readable then enforce
 707                         * mm->dumpable = 0 regardless of the interpreter's
 708                         * permissions.
 709                         */
 710                        if (file_permission(interpreter, MAY_READ) < 0)
 711                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 712
 713                        retval = kernel_read(interpreter, 0, bprm->buf,
 714                                             BINPRM_BUF_SIZE);
 715                        if (retval != BINPRM_BUF_SIZE) {
 716                                if (retval >= 0)
 717                                        retval = -EIO;
 718                                goto out_free_dentry;
 719                        }
 720
 721                        /* Get the exec headers */
 722                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 723                        break;
 724                }
 725                elf_ppnt++;
 726        }
 727
 728        elf_ppnt = elf_phdata;
 729        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 730                if (elf_ppnt->p_type == PT_GNU_STACK) {
 731                        if (elf_ppnt->p_flags & PF_X)
 732                                executable_stack = EXSTACK_ENABLE_X;
 733                        else
 734                                executable_stack = EXSTACK_DISABLE_X;
 735                        break;
 736                }
 737
 738        /* Some simple consistency checks for the interpreter */
 739        if (elf_interpreter) {
 740                retval = -ELIBBAD;
 741                /* Not an ELF interpreter */
 742                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 743                        goto out_free_dentry;
 744                /* Verify the interpreter has a valid arch */
 745                if (!elf_check_arch(&loc->interp_elf_ex))
 746                        goto out_free_dentry;
 747        } else {
 748                /* Executables without an interpreter also need a personality  */
 749                SET_PERSONALITY(loc->elf_ex);
 750        }
 751
 752        /* Flush all traces of the currently running executable */
 753        retval = flush_old_exec(bprm);
 754        if (retval)
 755                goto out_free_dentry;
 756
 757        /* OK, This is the point of no return */
 758        current->flags &= ~PF_FORKNOEXEC;
 759        current->mm->def_flags = def_flags;
 760
 761        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 762           may depend on the personality.  */
 763        SET_PERSONALITY(loc->elf_ex);
 764        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 765                current->personality |= READ_IMPLIES_EXEC;
 766
 767        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 768                current->flags |= PF_RANDOMIZE;
 769        arch_pick_mmap_layout(current->mm);
 770
 771        /* Do this so that we can load the interpreter, if need be.  We will
 772           change some of these later */
 773        current->mm->free_area_cache = current->mm->mmap_base;
 774        current->mm->cached_hole_size = 0;
 775        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 776                                 executable_stack);
 777        if (retval < 0) {
 778                send_sig(SIGKILL, current, 0);
 779                goto out_free_dentry;
 780        }
 781        
 782        current->mm->start_stack = bprm->p;
 783
 784        /* Now we do a little grungy work by mmaping the ELF image into
 785           the correct location in memory. */
 786        for(i = 0, elf_ppnt = elf_phdata;
 787            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 788                int elf_prot = 0, elf_flags;
 789                unsigned long k, vaddr;
 790
 791                if (elf_ppnt->p_type != PT_LOAD)
 792                        continue;
 793
 794                if (unlikely (elf_brk > elf_bss)) {
 795                        unsigned long nbyte;
 796                    
 797                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 798                           before this one. Map anonymous pages, if needed,
 799                           and clear the area.  */
 800                        retval = set_brk (elf_bss + load_bias,
 801                                          elf_brk + load_bias);
 802                        if (retval) {
 803                                send_sig(SIGKILL, current, 0);
 804                                goto out_free_dentry;
 805                        }
 806                        nbyte = ELF_PAGEOFFSET(elf_bss);
 807                        if (nbyte) {
 808                                nbyte = ELF_MIN_ALIGN - nbyte;
 809                                if (nbyte > elf_brk - elf_bss)
 810                                        nbyte = elf_brk - elf_bss;
 811                                if (clear_user((void __user *)elf_bss +
 812                                                        load_bias, nbyte)) {
 813                                        /*
 814                                         * This bss-zeroing can fail if the ELF
 815                                         * file specifies odd protections. So
 816                                         * we don't check the return value
 817                                         */
 818                                }
 819                        }
 820                }
 821
 822                if (elf_ppnt->p_flags & PF_R)
 823                        elf_prot |= PROT_READ;
 824                if (elf_ppnt->p_flags & PF_W)
 825                        elf_prot |= PROT_WRITE;
 826                if (elf_ppnt->p_flags & PF_X)
 827                        elf_prot |= PROT_EXEC;
 828
 829                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 830
 831                vaddr = elf_ppnt->p_vaddr;
 832                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 833                        elf_flags |= MAP_FIXED;
 834                } else if (loc->elf_ex.e_type == ET_DYN) {
 835                        /* Try and get dynamic programs out of the way of the
 836                         * default mmap base, as well as whatever program they
 837                         * might try to exec.  This is because the brk will
 838                         * follow the loader, and is not movable.  */
 839#ifdef CONFIG_X86
 840                        load_bias = 0;
 841#else
 842                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 843#endif
 844                }
 845
 846                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 847                                elf_prot, elf_flags, 0);
 848                if (BAD_ADDR(error)) {
 849                        send_sig(SIGKILL, current, 0);
 850                        retval = IS_ERR((void *)error) ?
 851                                PTR_ERR((void*)error) : -EINVAL;
 852                        goto out_free_dentry;
 853                }
 854
 855                if (!load_addr_set) {
 856                        load_addr_set = 1;
 857                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 858                        if (loc->elf_ex.e_type == ET_DYN) {
 859                                load_bias += error -
 860                                             ELF_PAGESTART(load_bias + vaddr);
 861                                load_addr += load_bias;
 862                                reloc_func_desc = load_bias;
 863                        }
 864                }
 865                k = elf_ppnt->p_vaddr;
 866                if (k < start_code)
 867                        start_code = k;
 868                if (start_data < k)
 869                        start_data = k;
 870
 871                /*
 872                 * Check to see if the section's size will overflow the
 873                 * allowed task size. Note that p_filesz must always be
 874                 * <= p_memsz so it is only necessary to check p_memsz.
 875                 */
 876                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 877                    elf_ppnt->p_memsz > TASK_SIZE ||
 878                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 879                        /* set_brk can never work. Avoid overflows. */
 880                        send_sig(SIGKILL, current, 0);
 881                        retval = -EINVAL;
 882                        goto out_free_dentry;
 883                }
 884
 885                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 886
 887                if (k > elf_bss)
 888                        elf_bss = k;
 889                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 890                        end_code = k;
 891                if (end_data < k)
 892                        end_data = k;
 893                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 894                if (k > elf_brk)
 895                        elf_brk = k;
 896        }
 897
 898        loc->elf_ex.e_entry += load_bias;
 899        elf_bss += load_bias;
 900        elf_brk += load_bias;
 901        start_code += load_bias;
 902        end_code += load_bias;
 903        start_data += load_bias;
 904        end_data += load_bias;
 905
 906        /* Calling set_brk effectively mmaps the pages that we need
 907         * for the bss and break sections.  We must do this before
 908         * mapping in the interpreter, to make sure it doesn't wind
 909         * up getting placed where the bss needs to go.
 910         */
 911        retval = set_brk(elf_bss, elf_brk);
 912        if (retval) {
 913                send_sig(SIGKILL, current, 0);
 914                goto out_free_dentry;
 915        }
 916        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 917                send_sig(SIGSEGV, current, 0);
 918                retval = -EFAULT; /* Nobody gets to see this, but.. */
 919                goto out_free_dentry;
 920        }
 921
 922        if (elf_interpreter) {
 923                unsigned long uninitialized_var(interp_map_addr);
 924
 925                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 926                                            interpreter,
 927                                            &interp_map_addr,
 928                                            load_bias);
 929                if (!IS_ERR((void *)elf_entry)) {
 930                        /*
 931                         * load_elf_interp() returns relocation
 932                         * adjustment
 933                         */
 934                        interp_load_addr = elf_entry;
 935                        elf_entry += loc->interp_elf_ex.e_entry;
 936                }
 937                if (BAD_ADDR(elf_entry)) {
 938                        force_sig(SIGSEGV, current);
 939                        retval = IS_ERR((void *)elf_entry) ?
 940                                        (int)elf_entry : -EINVAL;
 941                        goto out_free_dentry;
 942                }
 943                reloc_func_desc = interp_load_addr;
 944
 945                allow_write_access(interpreter);
 946                fput(interpreter);
 947                kfree(elf_interpreter);
 948        } else {
 949                elf_entry = loc->elf_ex.e_entry;
 950                if (BAD_ADDR(elf_entry)) {
 951                        force_sig(SIGSEGV, current);
 952                        retval = -EINVAL;
 953                        goto out_free_dentry;
 954                }
 955        }
 956
 957        kfree(elf_phdata);
 958
 959        sys_close(elf_exec_fileno);
 960
 961        set_binfmt(&elf_format);
 962
 963#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 964        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 965        if (retval < 0) {
 966                send_sig(SIGKILL, current, 0);
 967                goto out;
 968        }
 969#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 970
 971        install_exec_creds(bprm);
 972        current->flags &= ~PF_FORKNOEXEC;
 973        retval = create_elf_tables(bprm, &loc->elf_ex,
 974                          load_addr, interp_load_addr);
 975        if (retval < 0) {
 976                send_sig(SIGKILL, current, 0);
 977                goto out;
 978        }
 979        /* N.B. passed_fileno might not be initialized? */
 980        current->mm->end_code = end_code;
 981        current->mm->start_code = start_code;
 982        current->mm->start_data = start_data;
 983        current->mm->end_data = end_data;
 984        current->mm->start_stack = bprm->p;
 985
 986#ifdef arch_randomize_brk
 987        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
 988                current->mm->brk = current->mm->start_brk =
 989                        arch_randomize_brk(current->mm);
 990#endif
 991
 992        if (current->personality & MMAP_PAGE_ZERO) {
 993                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 994                   and some applications "depend" upon this behavior.
 995                   Since we do not have the power to recompile these, we
 996                   emulate the SVr4 behavior. Sigh. */
 997                down_write(&current->mm->mmap_sem);
 998                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 999                                MAP_FIXED | MAP_PRIVATE, 0);
1000                up_write(&current->mm->mmap_sem);
1001        }
1002
1003#ifdef ELF_PLAT_INIT
1004        /*
1005         * The ABI may specify that certain registers be set up in special
1006         * ways (on i386 %edx is the address of a DT_FINI function, for
1007         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1008         * that the e_entry field is the address of the function descriptor
1009         * for the startup routine, rather than the address of the startup
1010         * routine itself.  This macro performs whatever initialization to
1011         * the regs structure is required as well as any relocations to the
1012         * function descriptor entries when executing dynamically links apps.
1013         */
1014        ELF_PLAT_INIT(regs, reloc_func_desc);
1015#endif
1016
1017        start_thread(regs, elf_entry, bprm->p);
1018        retval = 0;
1019out:
1020        kfree(loc);
1021out_ret:
1022        return retval;
1023
1024        /* error cleanup */
1025out_free_dentry:
1026        allow_write_access(interpreter);
1027        if (interpreter)
1028                fput(interpreter);
1029out_free_interp:
1030        kfree(elf_interpreter);
1031out_free_file:
1032        sys_close(elf_exec_fileno);
1033out_free_ph:
1034        kfree(elf_phdata);
1035        goto out;
1036}
1037
1038/* This is really simpleminded and specialized - we are loading an
1039   a.out library that is given an ELF header. */
1040static int load_elf_library(struct file *file)
1041{
1042        struct elf_phdr *elf_phdata;
1043        struct elf_phdr *eppnt;
1044        unsigned long elf_bss, bss, len;
1045        int retval, error, i, j;
1046        struct elfhdr elf_ex;
1047
1048        error = -ENOEXEC;
1049        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1050        if (retval != sizeof(elf_ex))
1051                goto out;
1052
1053        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1054                goto out;
1055
1056        /* First of all, some simple consistency checks */
1057        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1058            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1059                goto out;
1060
1061        /* Now read in all of the header information */
1062
1063        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1064        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1065
1066        error = -ENOMEM;
1067        elf_phdata = kmalloc(j, GFP_KERNEL);
1068        if (!elf_phdata)
1069                goto out;
1070
1071        eppnt = elf_phdata;
1072        error = -ENOEXEC;
1073        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1074        if (retval != j)
1075                goto out_free_ph;
1076
1077        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1078                if ((eppnt + i)->p_type == PT_LOAD)
1079                        j++;
1080        if (j != 1)
1081                goto out_free_ph;
1082
1083        while (eppnt->p_type != PT_LOAD)
1084                eppnt++;
1085
1086        /* Now use mmap to map the library into memory. */
1087        down_write(&current->mm->mmap_sem);
1088        error = do_mmap(file,
1089                        ELF_PAGESTART(eppnt->p_vaddr),
1090                        (eppnt->p_filesz +
1091                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1092                        PROT_READ | PROT_WRITE | PROT_EXEC,
1093                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1094                        (eppnt->p_offset -
1095                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1096        up_write(&current->mm->mmap_sem);
1097        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1098                goto out_free_ph;
1099
1100        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1101        if (padzero(elf_bss)) {
1102                error = -EFAULT;
1103                goto out_free_ph;
1104        }
1105
1106        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1107                            ELF_MIN_ALIGN - 1);
1108        bss = eppnt->p_memsz + eppnt->p_vaddr;
1109        if (bss > len) {
1110                down_write(&current->mm->mmap_sem);
1111                do_brk(len, bss - len);
1112                up_write(&current->mm->mmap_sem);
1113        }
1114        error = 0;
1115
1116out_free_ph:
1117        kfree(elf_phdata);
1118out:
1119        return error;
1120}
1121
1122/*
1123 * Note that some platforms still use traditional core dumps and not
1124 * the ELF core dump.  Each platform can select it as appropriate.
1125 */
1126#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1127
1128/*
1129 * ELF core dumper
1130 *
1131 * Modelled on fs/exec.c:aout_core_dump()
1132 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1133 */
1134/*
1135 * These are the only things you should do on a core-file: use only these
1136 * functions to write out all the necessary info.
1137 */
1138static int dump_write(struct file *file, const void *addr, int nr)
1139{
1140        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1141}
1142
1143static int dump_seek(struct file *file, loff_t off)
1144{
1145        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1146                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1147                        return 0;
1148        } else {
1149                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1150                if (!buf)
1151                        return 0;
1152                while (off > 0) {
1153                        unsigned long n = off;
1154                        if (n > PAGE_SIZE)
1155                                n = PAGE_SIZE;
1156                        if (!dump_write(file, buf, n))
1157                                return 0;
1158                        off -= n;
1159                }
1160                free_page((unsigned long)buf);
1161        }
1162        return 1;
1163}
1164
1165/*
1166 * Decide what to dump of a segment, part, all or none.
1167 */
1168static unsigned long vma_dump_size(struct vm_area_struct *vma,
1169                                   unsigned long mm_flags)
1170{
1171#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1172
1173        /* The vma can be set up to tell us the answer directly.  */
1174        if (vma->vm_flags & VM_ALWAYSDUMP)
1175                goto whole;
1176
1177        /* Hugetlb memory check */
1178        if (vma->vm_flags & VM_HUGETLB) {
1179                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1180                        goto whole;
1181                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1182                        goto whole;
1183        }
1184
1185        /* Do not dump I/O mapped devices or special mappings */
1186        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1187                return 0;
1188
1189        /* By default, dump shared memory if mapped from an anonymous file. */
1190        if (vma->vm_flags & VM_SHARED) {
1191                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1192                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1193                        goto whole;
1194                return 0;
1195        }
1196
1197        /* Dump segments that have been written to.  */
1198        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1199                goto whole;
1200        if (vma->vm_file == NULL)
1201                return 0;
1202
1203        if (FILTER(MAPPED_PRIVATE))
1204                goto whole;
1205
1206        /*
1207         * If this looks like the beginning of a DSO or executable mapping,
1208         * check for an ELF header.  If we find one, dump the first page to
1209         * aid in determining what was mapped here.
1210         */
1211        if (FILTER(ELF_HEADERS) &&
1212            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1213                u32 __user *header = (u32 __user *) vma->vm_start;
1214                u32 word;
1215                mm_segment_t fs = get_fs();
1216                /*
1217                 * Doing it this way gets the constant folded by GCC.
1218                 */
1219                union {
1220                        u32 cmp;
1221                        char elfmag[SELFMAG];
1222                } magic;
1223                BUILD_BUG_ON(SELFMAG != sizeof word);
1224                magic.elfmag[EI_MAG0] = ELFMAG0;
1225                magic.elfmag[EI_MAG1] = ELFMAG1;
1226                magic.elfmag[EI_MAG2] = ELFMAG2;
1227                magic.elfmag[EI_MAG3] = ELFMAG3;
1228                /*
1229                 * Switch to the user "segment" for get_user(),
1230                 * then put back what elf_core_dump() had in place.
1231                 */
1232                set_fs(USER_DS);
1233                if (unlikely(get_user(word, header)))
1234                        word = 0;
1235                set_fs(fs);
1236                if (word == magic.cmp)
1237                        return PAGE_SIZE;
1238        }
1239
1240#undef  FILTER
1241
1242        return 0;
1243
1244whole:
1245        return vma->vm_end - vma->vm_start;
1246}
1247
1248/* An ELF note in memory */
1249struct memelfnote
1250{
1251        const char *name;
1252        int type;
1253        unsigned int datasz;
1254        void *data;
1255};
1256
1257static int notesize(struct memelfnote *en)
1258{
1259        int sz;
1260
1261        sz = sizeof(struct elf_note);
1262        sz += roundup(strlen(en->name) + 1, 4);
1263        sz += roundup(en->datasz, 4);
1264
1265        return sz;
1266}
1267
1268#define DUMP_WRITE(addr, nr, foffset)   \
1269        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1270
1271static int alignfile(struct file *file, loff_t *foffset)
1272{
1273        static const char buf[4] = { 0, };
1274        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1275        return 1;
1276}
1277
1278static int writenote(struct memelfnote *men, struct file *file,
1279                        loff_t *foffset)
1280{
1281        struct elf_note en;
1282        en.n_namesz = strlen(men->name) + 1;
1283        en.n_descsz = men->datasz;
1284        en.n_type = men->type;
1285
1286        DUMP_WRITE(&en, sizeof(en), foffset);
1287        DUMP_WRITE(men->name, en.n_namesz, foffset);
1288        if (!alignfile(file, foffset))
1289                return 0;
1290        DUMP_WRITE(men->data, men->datasz, foffset);
1291        if (!alignfile(file, foffset))
1292                return 0;
1293
1294        return 1;
1295}
1296#undef DUMP_WRITE
1297
1298#define DUMP_WRITE(addr, nr)    \
1299        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1300                goto end_coredump;
1301#define DUMP_SEEK(off)  \
1302        if (!dump_seek(file, (off))) \
1303                goto end_coredump;
1304
1305static void fill_elf_header(struct elfhdr *elf, int segs,
1306                            u16 machine, u32 flags, u8 osabi)
1307{
1308        memset(elf, 0, sizeof(*elf));
1309
1310        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1311        elf->e_ident[EI_CLASS] = ELF_CLASS;
1312        elf->e_ident[EI_DATA] = ELF_DATA;
1313        elf->e_ident[EI_VERSION] = EV_CURRENT;
1314        elf->e_ident[EI_OSABI] = ELF_OSABI;
1315
1316        elf->e_type = ET_CORE;
1317        elf->e_machine = machine;
1318        elf->e_version = EV_CURRENT;
1319        elf->e_phoff = sizeof(struct elfhdr);
1320        elf->e_flags = flags;
1321        elf->e_ehsize = sizeof(struct elfhdr);
1322        elf->e_phentsize = sizeof(struct elf_phdr);
1323        elf->e_phnum = segs;
1324
1325        return;
1326}
1327
1328static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1329{
1330        phdr->p_type = PT_NOTE;
1331        phdr->p_offset = offset;
1332        phdr->p_vaddr = 0;
1333        phdr->p_paddr = 0;
1334        phdr->p_filesz = sz;
1335        phdr->p_memsz = 0;
1336        phdr->p_flags = 0;
1337        phdr->p_align = 0;
1338        return;
1339}
1340
1341static void fill_note(struct memelfnote *note, const char *name, int type, 
1342                unsigned int sz, void *data)
1343{
1344        note->name = name;
1345        note->type = type;
1346        note->datasz = sz;
1347        note->data = data;
1348        return;
1349}
1350
1351/*
1352 * fill up all the fields in prstatus from the given task struct, except
1353 * registers which need to be filled up separately.
1354 */
1355static void fill_prstatus(struct elf_prstatus *prstatus,
1356                struct task_struct *p, long signr)
1357{
1358        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1359        prstatus->pr_sigpend = p->pending.signal.sig[0];
1360        prstatus->pr_sighold = p->blocked.sig[0];
1361        prstatus->pr_pid = task_pid_vnr(p);
1362        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1363        prstatus->pr_pgrp = task_pgrp_vnr(p);
1364        prstatus->pr_sid = task_session_vnr(p);
1365        if (thread_group_leader(p)) {
1366                struct task_cputime cputime;
1367
1368                /*
1369                 * This is the record for the group leader.  It shows the
1370                 * group-wide total, not its individual thread total.
1371                 */
1372                thread_group_cputime(p, &cputime);
1373                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1374                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1375        } else {
1376                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1377                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1378        }
1379        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1380        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1381}
1382
1383static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1384                       struct mm_struct *mm)
1385{
1386        const struct cred *cred;
1387        unsigned int i, len;
1388        
1389        /* first copy the parameters from user space */
1390        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1391
1392        len = mm->arg_end - mm->arg_start;
1393        if (len >= ELF_PRARGSZ)
1394                len = ELF_PRARGSZ-1;
1395        if (copy_from_user(&psinfo->pr_psargs,
1396                           (const char __user *)mm->arg_start, len))
1397                return -EFAULT;
1398        for(i = 0; i < len; i++)
1399                if (psinfo->pr_psargs[i] == 0)
1400                        psinfo->pr_psargs[i] = ' ';
1401        psinfo->pr_psargs[len] = 0;
1402
1403        psinfo->pr_pid = task_pid_vnr(p);
1404        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1405        psinfo->pr_pgrp = task_pgrp_vnr(p);
1406        psinfo->pr_sid = task_session_vnr(p);
1407
1408        i = p->state ? ffz(~p->state) + 1 : 0;
1409        psinfo->pr_state = i;
1410        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1411        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1412        psinfo->pr_nice = task_nice(p);
1413        psinfo->pr_flag = p->flags;
1414        rcu_read_lock();
1415        cred = __task_cred(p);
1416        SET_UID(psinfo->pr_uid, cred->uid);
1417        SET_GID(psinfo->pr_gid, cred->gid);
1418        rcu_read_unlock();
1419        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1420        
1421        return 0;
1422}
1423
1424static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1425{
1426        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1427        int i = 0;
1428        do
1429                i += 2;
1430        while (auxv[i - 2] != AT_NULL);
1431        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1432}
1433
1434#ifdef CORE_DUMP_USE_REGSET
1435#include <linux/regset.h>
1436
1437struct elf_thread_core_info {
1438        struct elf_thread_core_info *next;
1439        struct task_struct *task;
1440        struct elf_prstatus prstatus;
1441        struct memelfnote notes[0];
1442};
1443
1444struct elf_note_info {
1445        struct elf_thread_core_info *thread;
1446        struct memelfnote psinfo;
1447        struct memelfnote auxv;
1448        size_t size;
1449        int thread_notes;
1450};
1451
1452/*
1453 * When a regset has a writeback hook, we call it on each thread before
1454 * dumping user memory.  On register window machines, this makes sure the
1455 * user memory backing the register data is up to date before we read it.
1456 */
1457static void do_thread_regset_writeback(struct task_struct *task,
1458                                       const struct user_regset *regset)
1459{
1460        if (regset->writeback)
1461                regset->writeback(task, regset, 1);
1462}
1463
1464static int fill_thread_core_info(struct elf_thread_core_info *t,
1465                                 const struct user_regset_view *view,
1466                                 long signr, size_t *total)
1467{
1468        unsigned int i;
1469
1470        /*
1471         * NT_PRSTATUS is the one special case, because the regset data
1472         * goes into the pr_reg field inside the note contents, rather
1473         * than being the whole note contents.  We fill the reset in here.
1474         * We assume that regset 0 is NT_PRSTATUS.
1475         */
1476        fill_prstatus(&t->prstatus, t->task, signr);
1477        (void) view->regsets[0].get(t->task, &view->regsets[0],
1478                                    0, sizeof(t->prstatus.pr_reg),
1479                                    &t->prstatus.pr_reg, NULL);
1480
1481        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1482                  sizeof(t->prstatus), &t->prstatus);
1483        *total += notesize(&t->notes[0]);
1484
1485        do_thread_regset_writeback(t->task, &view->regsets[0]);
1486
1487        /*
1488         * Each other regset might generate a note too.  For each regset
1489         * that has no core_note_type or is inactive, we leave t->notes[i]
1490         * all zero and we'll know to skip writing it later.
1491         */
1492        for (i = 1; i < view->n; ++i) {
1493                const struct user_regset *regset = &view->regsets[i];
1494                do_thread_regset_writeback(t->task, regset);
1495                if (regset->core_note_type &&
1496                    (!regset->active || regset->active(t->task, regset))) {
1497                        int ret;
1498                        size_t size = regset->n * regset->size;
1499                        void *data = kmalloc(size, GFP_KERNEL);
1500                        if (unlikely(!data))
1501                                return 0;
1502                        ret = regset->get(t->task, regset,
1503                                          0, size, data, NULL);
1504                        if (unlikely(ret))
1505                                kfree(data);
1506                        else {
1507                                if (regset->core_note_type != NT_PRFPREG)
1508                                        fill_note(&t->notes[i], "LINUX",
1509                                                  regset->core_note_type,
1510                                                  size, data);
1511                                else {
1512                                        t->prstatus.pr_fpvalid = 1;
1513                                        fill_note(&t->notes[i], "CORE",
1514                                                  NT_PRFPREG, size, data);
1515                                }
1516                                *total += notesize(&t->notes[i]);
1517                        }
1518                }
1519        }
1520
1521        return 1;
1522}
1523
1524static int fill_note_info(struct elfhdr *elf, int phdrs,
1525                          struct elf_note_info *info,
1526                          long signr, struct pt_regs *regs)
1527{
1528        struct task_struct *dump_task = current;
1529        const struct user_regset_view *view = task_user_regset_view(dump_task);
1530        struct elf_thread_core_info *t;
1531        struct elf_prpsinfo *psinfo;
1532        struct core_thread *ct;
1533        unsigned int i;
1534
1535        info->size = 0;
1536        info->thread = NULL;
1537
1538        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1539        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1540
1541        if (psinfo == NULL)
1542                return 0;
1543
1544        /*
1545         * Figure out how many notes we're going to need for each thread.
1546         */
1547        info->thread_notes = 0;
1548        for (i = 0; i < view->n; ++i)
1549                if (view->regsets[i].core_note_type != 0)
1550                        ++info->thread_notes;
1551
1552        /*
1553         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1554         * since it is our one special case.
1555         */
1556        if (unlikely(info->thread_notes == 0) ||
1557            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1558                WARN_ON(1);
1559                return 0;
1560        }
1561
1562        /*
1563         * Initialize the ELF file header.
1564         */
1565        fill_elf_header(elf, phdrs,
1566                        view->e_machine, view->e_flags, view->ei_osabi);
1567
1568        /*
1569         * Allocate a structure for each thread.
1570         */
1571        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1572                t = kzalloc(offsetof(struct elf_thread_core_info,
1573                                     notes[info->thread_notes]),
1574                            GFP_KERNEL);
1575                if (unlikely(!t))
1576                        return 0;
1577
1578                t->task = ct->task;
1579                if (ct->task == dump_task || !info->thread) {
1580                        t->next = info->thread;
1581                        info->thread = t;
1582                } else {
1583                        /*
1584                         * Make sure to keep the original task at
1585                         * the head of the list.
1586                         */
1587                        t->next = info->thread->next;
1588                        info->thread->next = t;
1589                }
1590        }
1591
1592        /*
1593         * Now fill in each thread's information.
1594         */
1595        for (t = info->thread; t != NULL; t = t->next)
1596                if (!fill_thread_core_info(t, view, signr, &info->size))
1597                        return 0;
1598
1599        /*
1600         * Fill in the two process-wide notes.
1601         */
1602        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1603        info->size += notesize(&info->psinfo);
1604
1605        fill_auxv_note(&info->auxv, current->mm);
1606        info->size += notesize(&info->auxv);
1607
1608        return 1;
1609}
1610
1611static size_t get_note_info_size(struct elf_note_info *info)
1612{
1613        return info->size;
1614}
1615
1616/*
1617 * Write all the notes for each thread.  When writing the first thread, the
1618 * process-wide notes are interleaved after the first thread-specific note.
1619 */
1620static int write_note_info(struct elf_note_info *info,
1621                           struct file *file, loff_t *foffset)
1622{
1623        bool first = 1;
1624        struct elf_thread_core_info *t = info->thread;
1625
1626        do {
1627                int i;
1628
1629                if (!writenote(&t->notes[0], file, foffset))
1630                        return 0;
1631
1632                if (first && !writenote(&info->psinfo, file, foffset))
1633                        return 0;
1634                if (first && !writenote(&info->auxv, file, foffset))
1635                        return 0;
1636
1637                for (i = 1; i < info->thread_notes; ++i)
1638                        if (t->notes[i].data &&
1639                            !writenote(&t->notes[i], file, foffset))
1640                                return 0;
1641
1642                first = 0;
1643                t = t->next;
1644        } while (t);
1645
1646        return 1;
1647}
1648
1649static void free_note_info(struct elf_note_info *info)
1650{
1651        struct elf_thread_core_info *threads = info->thread;
1652        while (threads) {
1653                unsigned int i;
1654                struct elf_thread_core_info *t = threads;
1655                threads = t->next;
1656                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1657                for (i = 1; i < info->thread_notes; ++i)
1658                        kfree(t->notes[i].data);
1659                kfree(t);
1660        }
1661        kfree(info->psinfo.data);
1662}
1663
1664#else
1665
1666/* Here is the structure in which status of each thread is captured. */
1667struct elf_thread_status
1668{
1669        struct list_head list;
1670        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1671        elf_fpregset_t fpu;             /* NT_PRFPREG */
1672        struct task_struct *thread;
1673#ifdef ELF_CORE_COPY_XFPREGS
1674        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1675#endif
1676        struct memelfnote notes[3];
1677        int num_notes;
1678};
1679
1680/*
1681 * In order to add the specific thread information for the elf file format,
1682 * we need to keep a linked list of every threads pr_status and then create
1683 * a single section for them in the final core file.
1684 */
1685static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1686{
1687        int sz = 0;
1688        struct task_struct *p = t->thread;
1689        t->num_notes = 0;
1690
1691        fill_prstatus(&t->prstatus, p, signr);
1692        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1693        
1694        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1695                  &(t->prstatus));
1696        t->num_notes++;
1697        sz += notesize(&t->notes[0]);
1698
1699        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1700                                                                &t->fpu))) {
1701                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1702                          &(t->fpu));
1703                t->num_notes++;
1704                sz += notesize(&t->notes[1]);
1705        }
1706
1707#ifdef ELF_CORE_COPY_XFPREGS
1708        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1709                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1710                          sizeof(t->xfpu), &t->xfpu);
1711                t->num_notes++;
1712                sz += notesize(&t->notes[2]);
1713        }
1714#endif  
1715        return sz;
1716}
1717
1718struct elf_note_info {
1719        struct memelfnote *notes;
1720        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1721        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1722        struct list_head thread_list;
1723        elf_fpregset_t *fpu;
1724#ifdef ELF_CORE_COPY_XFPREGS
1725        elf_fpxregset_t *xfpu;
1726#endif
1727        int thread_status_size;
1728        int numnote;
1729};
1730
1731static int fill_note_info(struct elfhdr *elf, int phdrs,
1732                          struct elf_note_info *info,
1733                          long signr, struct pt_regs *regs)
1734{
1735#define NUM_NOTES       6
1736        struct list_head *t;
1737
1738        info->notes = NULL;
1739        info->prstatus = NULL;
1740        info->psinfo = NULL;
1741        info->fpu = NULL;
1742#ifdef ELF_CORE_COPY_XFPREGS
1743        info->xfpu = NULL;
1744#endif
1745        INIT_LIST_HEAD(&info->thread_list);
1746
1747        info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1748                              GFP_KERNEL);
1749        if (!info->notes)
1750                return 0;
1751        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1752        if (!info->psinfo)
1753                return 0;
1754        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1755        if (!info->prstatus)
1756                return 0;
1757        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1758        if (!info->fpu)
1759                return 0;
1760#ifdef ELF_CORE_COPY_XFPREGS
1761        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1762        if (!info->xfpu)
1763                return 0;
1764#endif
1765
1766        info->thread_status_size = 0;
1767        if (signr) {
1768                struct core_thread *ct;
1769                struct elf_thread_status *ets;
1770
1771                for (ct = current->mm->core_state->dumper.next;
1772                                                ct; ct = ct->next) {
1773                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1774                        if (!ets)
1775                                return 0;
1776
1777                        ets->thread = ct->task;
1778                        list_add(&ets->list, &info->thread_list);
1779                }
1780
1781                list_for_each(t, &info->thread_list) {
1782                        int sz;
1783
1784                        ets = list_entry(t, struct elf_thread_status, list);
1785                        sz = elf_dump_thread_status(signr, ets);
1786                        info->thread_status_size += sz;
1787                }
1788        }
1789        /* now collect the dump for the current */
1790        memset(info->prstatus, 0, sizeof(*info->prstatus));
1791        fill_prstatus(info->prstatus, current, signr);
1792        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1793
1794        /* Set up header */
1795        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1796
1797        /*
1798         * Set up the notes in similar form to SVR4 core dumps made
1799         * with info from their /proc.
1800         */
1801
1802        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1803                  sizeof(*info->prstatus), info->prstatus);
1804        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1805        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1806                  sizeof(*info->psinfo), info->psinfo);
1807
1808        info->numnote = 2;
1809
1810        fill_auxv_note(&info->notes[info->numnote++], current->mm);
1811
1812        /* Try to dump the FPU. */
1813        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1814                                                               info->fpu);
1815        if (info->prstatus->pr_fpvalid)
1816                fill_note(info->notes + info->numnote++,
1817                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1818#ifdef ELF_CORE_COPY_XFPREGS
1819        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1820                fill_note(info->notes + info->numnote++,
1821                          "LINUX", ELF_CORE_XFPREG_TYPE,
1822                          sizeof(*info->xfpu), info->xfpu);
1823#endif
1824
1825        return 1;
1826
1827#undef NUM_NOTES
1828}
1829
1830static size_t get_note_info_size(struct elf_note_info *info)
1831{
1832        int sz = 0;
1833        int i;
1834
1835        for (i = 0; i < info->numnote; i++)
1836                sz += notesize(info->notes + i);
1837
1838        sz += info->thread_status_size;
1839
1840        return sz;
1841}
1842
1843static int write_note_info(struct elf_note_info *info,
1844                           struct file *file, loff_t *foffset)
1845{
1846        int i;
1847        struct list_head *t;
1848
1849        for (i = 0; i < info->numnote; i++)
1850                if (!writenote(info->notes + i, file, foffset))
1851                        return 0;
1852
1853        /* write out the thread status notes section */
1854        list_for_each(t, &info->thread_list) {
1855                struct elf_thread_status *tmp =
1856                                list_entry(t, struct elf_thread_status, list);
1857
1858                for (i = 0; i < tmp->num_notes; i++)
1859                        if (!writenote(&tmp->notes[i], file, foffset))
1860                                return 0;
1861        }
1862
1863        return 1;
1864}
1865
1866static void free_note_info(struct elf_note_info *info)
1867{
1868        while (!list_empty(&info->thread_list)) {
1869                struct list_head *tmp = info->thread_list.next;
1870                list_del(tmp);
1871                kfree(list_entry(tmp, struct elf_thread_status, list));
1872        }
1873
1874        kfree(info->prstatus);
1875        kfree(info->psinfo);
1876        kfree(info->notes);
1877        kfree(info->fpu);
1878#ifdef ELF_CORE_COPY_XFPREGS
1879        kfree(info->xfpu);
1880#endif
1881}
1882
1883#endif
1884
1885static struct vm_area_struct *first_vma(struct task_struct *tsk,
1886                                        struct vm_area_struct *gate_vma)
1887{
1888        struct vm_area_struct *ret = tsk->mm->mmap;
1889
1890        if (ret)
1891                return ret;
1892        return gate_vma;
1893}
1894/*
1895 * Helper function for iterating across a vma list.  It ensures that the caller
1896 * will visit `gate_vma' prior to terminating the search.
1897 */
1898static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1899                                        struct vm_area_struct *gate_vma)
1900{
1901        struct vm_area_struct *ret;
1902
1903        ret = this_vma->vm_next;
1904        if (ret)
1905                return ret;
1906        if (this_vma == gate_vma)
1907                return NULL;
1908        return gate_vma;
1909}
1910
1911/*
1912 * Actual dumper
1913 *
1914 * This is a two-pass process; first we find the offsets of the bits,
1915 * and then they are actually written out.  If we run out of core limit
1916 * we just truncate.
1917 */
1918static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1919{
1920        int has_dumped = 0;
1921        mm_segment_t fs;
1922        int segs;
1923        size_t size = 0;
1924        struct vm_area_struct *vma, *gate_vma;
1925        struct elfhdr *elf = NULL;
1926        loff_t offset = 0, dataoff, foffset;
1927        unsigned long mm_flags;
1928        struct elf_note_info info;
1929
1930        /*
1931         * We no longer stop all VM operations.
1932         * 
1933         * This is because those proceses that could possibly change map_count
1934         * or the mmap / vma pages are now blocked in do_exit on current
1935         * finishing this core dump.
1936         *
1937         * Only ptrace can touch these memory addresses, but it doesn't change
1938         * the map_count or the pages allocated. So no possibility of crashing
1939         * exists while dumping the mm->vm_next areas to the core file.
1940         */
1941  
1942        /* alloc memory for large data structures: too large to be on stack */
1943        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1944        if (!elf)
1945                goto out;
1946        
1947        segs = current->mm->map_count;
1948#ifdef ELF_CORE_EXTRA_PHDRS
1949        segs += ELF_CORE_EXTRA_PHDRS;
1950#endif
1951
1952        gate_vma = get_gate_vma(current);
1953        if (gate_vma != NULL)
1954                segs++;
1955
1956        /*
1957         * Collect all the non-memory information about the process for the
1958         * notes.  This also sets up the file header.
1959         */
1960        if (!fill_note_info(elf, segs + 1, /* including notes section */
1961                            &info, signr, regs))
1962                goto cleanup;
1963
1964        has_dumped = 1;
1965        current->flags |= PF_DUMPCORE;
1966  
1967        fs = get_fs();
1968        set_fs(KERNEL_DS);
1969
1970        DUMP_WRITE(elf, sizeof(*elf));
1971        offset += sizeof(*elf);                         /* Elf header */
1972        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1973        foffset = offset;
1974
1975        /* Write notes phdr entry */
1976        {
1977                struct elf_phdr phdr;
1978                size_t sz = get_note_info_size(&info);
1979
1980                sz += elf_coredump_extra_notes_size();
1981
1982                fill_elf_note_phdr(&phdr, sz, offset);
1983                offset += sz;
1984                DUMP_WRITE(&phdr, sizeof(phdr));
1985        }
1986
1987        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1988
1989        /*
1990         * We must use the same mm->flags while dumping core to avoid
1991         * inconsistency between the program headers and bodies, otherwise an
1992         * unusable core file can be generated.
1993         */
1994        mm_flags = current->mm->flags;
1995
1996        /* Write program headers for segments dump */
1997        for (vma = first_vma(current, gate_vma); vma != NULL;
1998                        vma = next_vma(vma, gate_vma)) {
1999                struct elf_phdr phdr;
2000
2001                phdr.p_type = PT_LOAD;
2002                phdr.p_offset = offset;
2003                phdr.p_vaddr = vma->vm_start;
2004                phdr.p_paddr = 0;
2005                phdr.p_filesz = vma_dump_size(vma, mm_flags);
2006                phdr.p_memsz = vma->vm_end - vma->vm_start;
2007                offset += phdr.p_filesz;
2008                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2009                if (vma->vm_flags & VM_WRITE)
2010                        phdr.p_flags |= PF_W;
2011                if (vma->vm_flags & VM_EXEC)
2012                        phdr.p_flags |= PF_X;
2013                phdr.p_align = ELF_EXEC_PAGESIZE;
2014
2015                DUMP_WRITE(&phdr, sizeof(phdr));
2016        }
2017
2018#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2019        ELF_CORE_WRITE_EXTRA_PHDRS;
2020#endif
2021
2022        /* write out the notes section */
2023        if (!write_note_info(&info, file, &foffset))
2024                goto end_coredump;
2025
2026        if (elf_coredump_extra_notes_write(file, &foffset))
2027                goto end_coredump;
2028
2029        /* Align to page */
2030        DUMP_SEEK(dataoff - foffset);
2031
2032        for (vma = first_vma(current, gate_vma); vma != NULL;
2033                        vma = next_vma(vma, gate_vma)) {
2034                unsigned long addr;
2035                unsigned long end;
2036
2037                end = vma->vm_start + vma_dump_size(vma, mm_flags);
2038
2039                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2040                        struct page *page;
2041                        struct vm_area_struct *tmp_vma;
2042
2043                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2044                                                &page, &tmp_vma) <= 0) {
2045                                DUMP_SEEK(PAGE_SIZE);
2046                        } else {
2047                                if (page == ZERO_PAGE(0)) {
2048                                        if (!dump_seek(file, PAGE_SIZE)) {
2049                                                page_cache_release(page);
2050                                                goto end_coredump;
2051                                        }
2052                                } else {
2053                                        void *kaddr;
2054                                        flush_cache_page(tmp_vma, addr,
2055                                                         page_to_pfn(page));
2056                                        kaddr = kmap(page);
2057                                        if ((size += PAGE_SIZE) > limit ||
2058                                            !dump_write(file, kaddr,
2059                                            PAGE_SIZE)) {
2060                                                kunmap(page);
2061                                                page_cache_release(page);
2062                                                goto end_coredump;
2063                                        }
2064                                        kunmap(page);
2065                                }
2066                                page_cache_release(page);
2067                        }
2068                }
2069        }
2070
2071#ifdef ELF_CORE_WRITE_EXTRA_DATA
2072        ELF_CORE_WRITE_EXTRA_DATA;
2073#endif
2074
2075end_coredump:
2076        set_fs(fs);
2077
2078cleanup:
2079        free_note_info(&info);
2080        kfree(elf);
2081out:
2082        return has_dumped;
2083}
2084
2085#endif          /* USE_ELF_CORE_DUMP */
2086
2087static int __init init_elf_binfmt(void)
2088{
2089        return register_binfmt(&elf_format);
2090}
2091
2092static void __exit exit_elf_binfmt(void)
2093{
2094        /* Remove the COFF and ELF loaders. */
2095        unregister_binfmt(&elf_format);
2096}
2097
2098core_initcall(init_elf_binfmt);
2099module_exit(exit_elf_binfmt);
2100MODULE_LICENSE("GPL");
2101
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.