linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/errno.h>
  20#include <linux/signal.h>
  21#include <linux/binfmts.h>
  22#include <linux/string.h>
  23#include <linux/file.h>
  24#include <linux/fcntl.h>
  25#include <linux/ptrace.h>
  26#include <linux/slab.h>
  27#include <linux/shm.h>
  28#include <linux/personality.h>
  29#include <linux/elfcore.h>
  30#include <linux/init.h>
  31#include <linux/highuid.h>
  32#include <linux/smp.h>
  33#include <linux/compiler.h>
  34#include <linux/highmem.h>
  35#include <linux/pagemap.h>
  36#include <linux/security.h>
  37#include <linux/syscalls.h>
  38#include <linux/random.h>
  39#include <linux/elf.h>
  40#include <linux/utsname.h>
  41#include <asm/uaccess.h>
  42#include <asm/param.h>
  43#include <asm/page.h>
  44
  45static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  46static int load_elf_library(struct file *);
  47static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  48                                int, int, unsigned long);
  49
  50/*
  51 * If we don't support core dumping, then supply a NULL so we
  52 * don't even try.
  53 */
  54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
  56#else
  57#define elf_core_dump   NULL
  58#endif
  59
  60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  61#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  62#else
  63#define ELF_MIN_ALIGN   PAGE_SIZE
  64#endif
  65
  66#ifndef ELF_CORE_EFLAGS
  67#define ELF_CORE_EFLAGS 0
  68#endif
  69
  70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  73
  74static struct linux_binfmt elf_format = {
  75                .module         = THIS_MODULE,
  76                .load_binary    = load_elf_binary,
  77                .load_shlib     = load_elf_library,
  78                .core_dump      = elf_core_dump,
  79                .min_coredump   = ELF_EXEC_PAGESIZE,
  80                .hasvdso        = 1
  81};
  82
  83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  84
  85static int set_brk(unsigned long start, unsigned long end)
  86{
  87        start = ELF_PAGEALIGN(start);
  88        end = ELF_PAGEALIGN(end);
  89        if (end > start) {
  90                unsigned long addr;
  91                down_write(&current->mm->mmap_sem);
  92                addr = do_brk(start, end - start);
  93                up_write(&current->mm->mmap_sem);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a little clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134#ifndef ELF_BASE_PLATFORM
 135/*
 136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 138 * will be copied to the user stack in the same manner as AT_PLATFORM.
 139 */
 140#define ELF_BASE_PLATFORM NULL
 141#endif
 142
 143static int
 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 145                unsigned long load_addr, unsigned long interp_load_addr)
 146{
 147        unsigned long p = bprm->p;
 148        int argc = bprm->argc;
 149        int envc = bprm->envc;
 150        elf_addr_t __user *argv;
 151        elf_addr_t __user *envp;
 152        elf_addr_t __user *sp;
 153        elf_addr_t __user *u_platform;
 154        elf_addr_t __user *u_base_platform;
 155        const char *k_platform = ELF_PLATFORM;
 156        const char *k_base_platform = ELF_BASE_PLATFORM;
 157        int items;
 158        elf_addr_t *elf_info;
 159        int ei_index = 0;
 160        struct task_struct *tsk = current;
 161        struct vm_area_struct *vma;
 162
 163        /*
 164         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 165         * evictions by the processes running on the same package. One
 166         * thing we can do is to shuffle the initial stack for them.
 167         */
 168
 169        p = arch_align_stack(p);
 170
 171        /*
 172         * If this architecture has a platform capability string, copy it
 173         * to userspace.  In some cases (Sparc), this info is impossible
 174         * for userspace to get any other way, in others (i386) it is
 175         * merely difficult.
 176         */
 177        u_platform = NULL;
 178        if (k_platform) {
 179                size_t len = strlen(k_platform) + 1;
 180
 181                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 182                if (__copy_to_user(u_platform, k_platform, len))
 183                        return -EFAULT;
 184        }
 185
 186        /*
 187         * If this architecture has a "base" platform capability
 188         * string, copy it to userspace.
 189         */
 190        u_base_platform = NULL;
 191        if (k_base_platform) {
 192                size_t len = strlen(k_base_platform) + 1;
 193
 194                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 195                if (__copy_to_user(u_base_platform, k_base_platform, len))
 196                        return -EFAULT;
 197        }
 198
 199        /* Create the ELF interpreter info */
 200        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 201        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 202#define NEW_AUX_ENT(id, val) \
 203        do { \
 204                elf_info[ei_index++] = id; \
 205                elf_info[ei_index++] = val; \
 206        } while (0)
 207
 208#ifdef ARCH_DLINFO
 209        /* 
 210         * ARCH_DLINFO must come first so PPC can do its special alignment of
 211         * AUXV.
 212         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 213         * ARCH_DLINFO changes
 214         */
 215        ARCH_DLINFO;
 216#endif
 217        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 218        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 219        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 220        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 221        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 222        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 223        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 224        NEW_AUX_ENT(AT_FLAGS, 0);
 225        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 226        NEW_AUX_ENT(AT_UID, tsk->uid);
 227        NEW_AUX_ENT(AT_EUID, tsk->euid);
 228        NEW_AUX_ENT(AT_GID, tsk->gid);
 229        NEW_AUX_ENT(AT_EGID, tsk->egid);
 230        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 231        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 232        if (k_platform) {
 233                NEW_AUX_ENT(AT_PLATFORM,
 234                            (elf_addr_t)(unsigned long)u_platform);
 235        }
 236        if (k_base_platform) {
 237                NEW_AUX_ENT(AT_BASE_PLATFORM,
 238                            (elf_addr_t)(unsigned long)u_base_platform);
 239        }
 240        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 241                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 242        }
 243#undef NEW_AUX_ENT
 244        /* AT_NULL is zero; clear the rest too */
 245        memset(&elf_info[ei_index], 0,
 246               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 247
 248        /* And advance past the AT_NULL entry.  */
 249        ei_index += 2;
 250
 251        sp = STACK_ADD(p, ei_index);
 252
 253        items = (argc + 1) + (envc + 1) + 1;
 254        bprm->p = STACK_ROUND(sp, items);
 255
 256        /* Point sp at the lowest address on the stack */
 257#ifdef CONFIG_STACK_GROWSUP
 258        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 259        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 260#else
 261        sp = (elf_addr_t __user *)bprm->p;
 262#endif
 263
 264
 265        /*
 266         * Grow the stack manually; some architectures have a limit on how
 267         * far ahead a user-space access may be in order to grow the stack.
 268         */
 269        vma = find_extend_vma(current->mm, bprm->p);
 270        if (!vma)
 271                return -EFAULT;
 272
 273        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 274        if (__put_user(argc, sp++))
 275                return -EFAULT;
 276        argv = sp;
 277        envp = argv + argc + 1;
 278
 279        /* Populate argv and envp */
 280        p = current->mm->arg_end = current->mm->arg_start;
 281        while (argc-- > 0) {
 282                size_t len;
 283                if (__put_user((elf_addr_t)p, argv++))
 284                        return -EFAULT;
 285                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 286                if (!len || len > MAX_ARG_STRLEN)
 287                        return -EINVAL;
 288                p += len;
 289        }
 290        if (__put_user(0, argv))
 291                return -EFAULT;
 292        current->mm->arg_end = current->mm->env_start = p;
 293        while (envc-- > 0) {
 294                size_t len;
 295                if (__put_user((elf_addr_t)p, envp++))
 296                        return -EFAULT;
 297                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 298                if (!len || len > MAX_ARG_STRLEN)
 299                        return -EINVAL;
 300                p += len;
 301        }
 302        if (__put_user(0, envp))
 303                return -EFAULT;
 304        current->mm->env_end = p;
 305
 306        /* Put the elf_info on the stack in the right place.  */
 307        sp = (elf_addr_t __user *)envp + 1;
 308        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 309                return -EFAULT;
 310        return 0;
 311}
 312
 313#ifndef elf_map
 314
 315static unsigned long elf_map(struct file *filep, unsigned long addr,
 316                struct elf_phdr *eppnt, int prot, int type,
 317                unsigned long total_size)
 318{
 319        unsigned long map_addr;
 320        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 321        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 322        addr = ELF_PAGESTART(addr);
 323        size = ELF_PAGEALIGN(size);
 324
 325        /* mmap() will return -EINVAL if given a zero size, but a
 326         * segment with zero filesize is perfectly valid */
 327        if (!size)
 328                return addr;
 329
 330        down_write(&current->mm->mmap_sem);
 331        /*
 332        * total_size is the size of the ELF (interpreter) image.
 333        * The _first_ mmap needs to know the full size, otherwise
 334        * randomization might put this image into an overlapping
 335        * position with the ELF binary image. (since size < total_size)
 336        * So we first map the 'big' image - and unmap the remainder at
 337        * the end. (which unmap is needed for ELF images with holes.)
 338        */
 339        if (total_size) {
 340                total_size = ELF_PAGEALIGN(total_size);
 341                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 342                if (!BAD_ADDR(map_addr))
 343                        do_munmap(current->mm, map_addr+size, total_size-size);
 344        } else
 345                map_addr = do_mmap(filep, addr, size, prot, type, off);
 346
 347        up_write(&current->mm->mmap_sem);
 348        return(map_addr);
 349}
 350
 351#endif /* !elf_map */
 352
 353static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 354{
 355        int i, first_idx = -1, last_idx = -1;
 356
 357        for (i = 0; i < nr; i++) {
 358                if (cmds[i].p_type == PT_LOAD) {
 359                        last_idx = i;
 360                        if (first_idx == -1)
 361                                first_idx = i;
 362                }
 363        }
 364        if (first_idx == -1)
 365                return 0;
 366
 367        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 368                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 369}
 370
 371
 372/* This is much more generalized than the library routine read function,
 373   so we keep this separate.  Technically the library read function
 374   is only provided so that we can read a.out libraries that have
 375   an ELF header */
 376
 377static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 378                struct file *interpreter, unsigned long *interp_map_addr,
 379                unsigned long no_base)
 380{
 381        struct elf_phdr *elf_phdata;
 382        struct elf_phdr *eppnt;
 383        unsigned long load_addr = 0;
 384        int load_addr_set = 0;
 385        unsigned long last_bss = 0, elf_bss = 0;
 386        unsigned long error = ~0UL;
 387        unsigned long total_size;
 388        int retval, i, size;
 389
 390        /* First of all, some simple consistency checks */
 391        if (interp_elf_ex->e_type != ET_EXEC &&
 392            interp_elf_ex->e_type != ET_DYN)
 393                goto out;
 394        if (!elf_check_arch(interp_elf_ex))
 395                goto out;
 396        if (!interpreter->f_op || !interpreter->f_op->mmap)
 397                goto out;
 398
 399        /*
 400         * If the size of this structure has changed, then punt, since
 401         * we will be doing the wrong thing.
 402         */
 403        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 404                goto out;
 405        if (interp_elf_ex->e_phnum < 1 ||
 406                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 407                goto out;
 408
 409        /* Now read in all of the header information */
 410        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 411        if (size > ELF_MIN_ALIGN)
 412                goto out;
 413        elf_phdata = kmalloc(size, GFP_KERNEL);
 414        if (!elf_phdata)
 415                goto out;
 416
 417        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 418                             (char *)elf_phdata,size);
 419        error = -EIO;
 420        if (retval != size) {
 421                if (retval < 0)
 422                        error = retval; 
 423                goto out_close;
 424        }
 425
 426        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 427        if (!total_size) {
 428                error = -EINVAL;
 429                goto out_close;
 430        }
 431
 432        eppnt = elf_phdata;
 433        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 434                if (eppnt->p_type == PT_LOAD) {
 435                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 436                        int elf_prot = 0;
 437                        unsigned long vaddr = 0;
 438                        unsigned long k, map_addr;
 439
 440                        if (eppnt->p_flags & PF_R)
 441                                elf_prot = PROT_READ;
 442                        if (eppnt->p_flags & PF_W)
 443                                elf_prot |= PROT_WRITE;
 444                        if (eppnt->p_flags & PF_X)
 445                                elf_prot |= PROT_EXEC;
 446                        vaddr = eppnt->p_vaddr;
 447                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 448                                elf_type |= MAP_FIXED;
 449                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 450                                load_addr = -vaddr;
 451
 452                        map_addr = elf_map(interpreter, load_addr + vaddr,
 453                                        eppnt, elf_prot, elf_type, total_size);
 454                        total_size = 0;
 455                        if (!*interp_map_addr)
 456                                *interp_map_addr = map_addr;
 457                        error = map_addr;
 458                        if (BAD_ADDR(map_addr))
 459                                goto out_close;
 460
 461                        if (!load_addr_set &&
 462                            interp_elf_ex->e_type == ET_DYN) {
 463                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 464                                load_addr_set = 1;
 465                        }
 466
 467                        /*
 468                         * Check to see if the section's size will overflow the
 469                         * allowed task size. Note that p_filesz must always be
 470                         * <= p_memsize so it's only necessary to check p_memsz.
 471                         */
 472                        k = load_addr + eppnt->p_vaddr;
 473                        if (BAD_ADDR(k) ||
 474                            eppnt->p_filesz > eppnt->p_memsz ||
 475                            eppnt->p_memsz > TASK_SIZE ||
 476                            TASK_SIZE - eppnt->p_memsz < k) {
 477                                error = -ENOMEM;
 478                                goto out_close;
 479                        }
 480
 481                        /*
 482                         * Find the end of the file mapping for this phdr, and
 483                         * keep track of the largest address we see for this.
 484                         */
 485                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 486                        if (k > elf_bss)
 487                                elf_bss = k;
 488
 489                        /*
 490                         * Do the same thing for the memory mapping - between
 491                         * elf_bss and last_bss is the bss section.
 492                         */
 493                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 494                        if (k > last_bss)
 495                                last_bss = k;
 496                }
 497        }
 498
 499        /*
 500         * Now fill out the bss section.  First pad the last page up
 501         * to the page boundary, and then perform a mmap to make sure
 502         * that there are zero-mapped pages up to and including the 
 503         * last bss page.
 504         */
 505        if (padzero(elf_bss)) {
 506                error = -EFAULT;
 507                goto out_close;
 508        }
 509
 510        /* What we have mapped so far */
 511        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 512
 513        /* Map the last of the bss segment */
 514        if (last_bss > elf_bss) {
 515                down_write(&current->mm->mmap_sem);
 516                error = do_brk(elf_bss, last_bss - elf_bss);
 517                up_write(&current->mm->mmap_sem);
 518                if (BAD_ADDR(error))
 519                        goto out_close;
 520        }
 521
 522        error = load_addr;
 523
 524out_close:
 525        kfree(elf_phdata);
 526out:
 527        return error;
 528}
 529
 530/*
 531 * These are the functions used to load ELF style executables and shared
 532 * libraries.  There is no binary dependent code anywhere else.
 533 */
 534
 535#define INTERPRETER_NONE 0
 536#define INTERPRETER_ELF 2
 537
 538#ifndef STACK_RND_MASK
 539#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 540#endif
 541
 542static unsigned long randomize_stack_top(unsigned long stack_top)
 543{
 544        unsigned int random_variable = 0;
 545
 546        if ((current->flags & PF_RANDOMIZE) &&
 547                !(current->personality & ADDR_NO_RANDOMIZE)) {
 548                random_variable = get_random_int() & STACK_RND_MASK;
 549                random_variable <<= PAGE_SHIFT;
 550        }
 551#ifdef CONFIG_STACK_GROWSUP
 552        return PAGE_ALIGN(stack_top) + random_variable;
 553#else
 554        return PAGE_ALIGN(stack_top) - random_variable;
 555#endif
 556}
 557
 558static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 559{
 560        struct file *interpreter = NULL; /* to shut gcc up */
 561        unsigned long load_addr = 0, load_bias = 0;
 562        int load_addr_set = 0;
 563        char * elf_interpreter = NULL;
 564        unsigned long error;
 565        struct elf_phdr *elf_ppnt, *elf_phdata;
 566        unsigned long elf_bss, elf_brk;
 567        int elf_exec_fileno;
 568        int retval, i;
 569        unsigned int size;
 570        unsigned long elf_entry;
 571        unsigned long interp_load_addr = 0;
 572        unsigned long start_code, end_code, start_data, end_data;
 573        unsigned long reloc_func_desc = 0;
 574        int executable_stack = EXSTACK_DEFAULT;
 575        unsigned long def_flags = 0;
 576        struct {
 577                struct elfhdr elf_ex;
 578                struct elfhdr interp_elf_ex;
 579        } *loc;
 580
 581        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 582        if (!loc) {
 583                retval = -ENOMEM;
 584                goto out_ret;
 585        }
 586        
 587        /* Get the exec-header */
 588        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 589
 590        retval = -ENOEXEC;
 591        /* First of all, some simple consistency checks */
 592        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 593                goto out;
 594
 595        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 596                goto out;
 597        if (!elf_check_arch(&loc->elf_ex))
 598                goto out;
 599        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 600                goto out;
 601
 602        /* Now read in all of the header information */
 603        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 604                goto out;
 605        if (loc->elf_ex.e_phnum < 1 ||
 606                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 607                goto out;
 608        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 609        retval = -ENOMEM;
 610        elf_phdata = kmalloc(size, GFP_KERNEL);
 611        if (!elf_phdata)
 612                goto out;
 613
 614        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 615                             (char *)elf_phdata, size);
 616        if (retval != size) {
 617                if (retval >= 0)
 618                        retval = -EIO;
 619                goto out_free_ph;
 620        }
 621
 622        retval = get_unused_fd();
 623        if (retval < 0)
 624                goto out_free_ph;
 625        get_file(bprm->file);
 626        fd_install(elf_exec_fileno = retval, bprm->file);
 627
 628        elf_ppnt = elf_phdata;
 629        elf_bss = 0;
 630        elf_brk = 0;
 631
 632        start_code = ~0UL;
 633        end_code = 0;
 634        start_data = 0;
 635        end_data = 0;
 636
 637        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 638                if (elf_ppnt->p_type == PT_INTERP) {
 639                        /* This is the program interpreter used for
 640                         * shared libraries - for now assume that this
 641                         * is an a.out format binary
 642                         */
 643                        retval = -ENOEXEC;
 644                        if (elf_ppnt->p_filesz > PATH_MAX || 
 645                            elf_ppnt->p_filesz < 2)
 646                                goto out_free_file;
 647
 648                        retval = -ENOMEM;
 649                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 650                                                  GFP_KERNEL);
 651                        if (!elf_interpreter)
 652                                goto out_free_file;
 653
 654                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 655                                             elf_interpreter,
 656                                             elf_ppnt->p_filesz);
 657                        if (retval != elf_ppnt->p_filesz) {
 658                                if (retval >= 0)
 659                                        retval = -EIO;
 660                                goto out_free_interp;
 661                        }
 662                        /* make sure path is NULL terminated */
 663                        retval = -ENOEXEC;
 664                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 665                                goto out_free_interp;
 666
 667                        /*
 668                         * The early SET_PERSONALITY here is so that the lookup
 669                         * for the interpreter happens in the namespace of the 
 670                         * to-be-execed image.  SET_PERSONALITY can select an
 671                         * alternate root.
 672                         *
 673                         * However, SET_PERSONALITY is NOT allowed to switch
 674                         * this task into the new images's memory mapping
 675                         * policy - that is, TASK_SIZE must still evaluate to
 676                         * that which is appropriate to the execing application.
 677                         * This is because exit_mmap() needs to have TASK_SIZE
 678                         * evaluate to the size of the old image.
 679                         *
 680                         * So if (say) a 64-bit application is execing a 32-bit
 681                         * application it is the architecture's responsibility
 682                         * to defer changing the value of TASK_SIZE until the
 683                         * switch really is going to happen - do this in
 684                         * flush_thread().      - akpm
 685                         */
 686                        SET_PERSONALITY(loc->elf_ex);
 687
 688                        interpreter = open_exec(elf_interpreter);
 689                        retval = PTR_ERR(interpreter);
 690                        if (IS_ERR(interpreter))
 691                                goto out_free_interp;
 692
 693                        /*
 694                         * If the binary is not readable then enforce
 695                         * mm->dumpable = 0 regardless of the interpreter's
 696                         * permissions.
 697                         */
 698                        if (file_permission(interpreter, MAY_READ) < 0)
 699                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 700
 701                        retval = kernel_read(interpreter, 0, bprm->buf,
 702                                             BINPRM_BUF_SIZE);
 703                        if (retval != BINPRM_BUF_SIZE) {
 704                                if (retval >= 0)
 705                                        retval = -EIO;
 706                                goto out_free_dentry;
 707                        }
 708
 709                        /* Get the exec headers */
 710                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 711                        break;
 712                }
 713                elf_ppnt++;
 714        }
 715
 716        elf_ppnt = elf_phdata;
 717        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 718                if (elf_ppnt->p_type == PT_GNU_STACK) {
 719                        if (elf_ppnt->p_flags & PF_X)
 720                                executable_stack = EXSTACK_ENABLE_X;
 721                        else
 722                                executable_stack = EXSTACK_DISABLE_X;
 723                        break;
 724                }
 725
 726        /* Some simple consistency checks for the interpreter */
 727        if (elf_interpreter) {
 728                retval = -ELIBBAD;
 729                /* Not an ELF interpreter */
 730                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 731                        goto out_free_dentry;
 732                /* Verify the interpreter has a valid arch */
 733                if (!elf_check_arch(&loc->interp_elf_ex))
 734                        goto out_free_dentry;
 735        } else {
 736                /* Executables without an interpreter also need a personality  */
 737                SET_PERSONALITY(loc->elf_ex);
 738        }
 739
 740        /* Flush all traces of the currently running executable */
 741        retval = flush_old_exec(bprm);
 742        if (retval)
 743                goto out_free_dentry;
 744
 745        /* OK, This is the point of no return */
 746        current->flags &= ~PF_FORKNOEXEC;
 747        current->mm->def_flags = def_flags;
 748
 749        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 750           may depend on the personality.  */
 751        SET_PERSONALITY(loc->elf_ex);
 752        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 753                current->personality |= READ_IMPLIES_EXEC;
 754
 755        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 756                current->flags |= PF_RANDOMIZE;
 757        arch_pick_mmap_layout(current->mm);
 758
 759        /* Do this so that we can load the interpreter, if need be.  We will
 760           change some of these later */
 761        current->mm->free_area_cache = current->mm->mmap_base;
 762        current->mm->cached_hole_size = 0;
 763        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 764                                 executable_stack);
 765        if (retval < 0) {
 766                send_sig(SIGKILL, current, 0);
 767                goto out_free_dentry;
 768        }
 769        
 770        current->mm->start_stack = bprm->p;
 771
 772        /* Now we do a little grungy work by mmaping the ELF image into
 773           the correct location in memory. */
 774        for(i = 0, elf_ppnt = elf_phdata;
 775            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 776                int elf_prot = 0, elf_flags;
 777                unsigned long k, vaddr;
 778
 779                if (elf_ppnt->p_type != PT_LOAD)
 780                        continue;
 781
 782                if (unlikely (elf_brk > elf_bss)) {
 783                        unsigned long nbyte;
 784                    
 785                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 786                           before this one. Map anonymous pages, if needed,
 787                           and clear the area.  */
 788                        retval = set_brk (elf_bss + load_bias,
 789                                          elf_brk + load_bias);
 790                        if (retval) {
 791                                send_sig(SIGKILL, current, 0);
 792                                goto out_free_dentry;
 793                        }
 794                        nbyte = ELF_PAGEOFFSET(elf_bss);
 795                        if (nbyte) {
 796                                nbyte = ELF_MIN_ALIGN - nbyte;
 797                                if (nbyte > elf_brk - elf_bss)
 798                                        nbyte = elf_brk - elf_bss;
 799                                if (clear_user((void __user *)elf_bss +
 800                                                        load_bias, nbyte)) {
 801                                        /*
 802                                         * This bss-zeroing can fail if the ELF
 803                                         * file specifies odd protections. So
 804                                         * we don't check the return value
 805                                         */
 806                                }
 807                        }
 808                }
 809
 810                if (elf_ppnt->p_flags & PF_R)
 811                        elf_prot |= PROT_READ;
 812                if (elf_ppnt->p_flags & PF_W)
 813                        elf_prot |= PROT_WRITE;
 814                if (elf_ppnt->p_flags & PF_X)
 815                        elf_prot |= PROT_EXEC;
 816
 817                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 818
 819                vaddr = elf_ppnt->p_vaddr;
 820                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 821                        elf_flags |= MAP_FIXED;
 822                } else if (loc->elf_ex.e_type == ET_DYN) {
 823                        /* Try and get dynamic programs out of the way of the
 824                         * default mmap base, as well as whatever program they
 825                         * might try to exec.  This is because the brk will
 826                         * follow the loader, and is not movable.  */
 827#ifdef CONFIG_X86
 828                        load_bias = 0;
 829#else
 830                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 831#endif
 832                }
 833
 834                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 835                                elf_prot, elf_flags, 0);
 836                if (BAD_ADDR(error)) {
 837                        send_sig(SIGKILL, current, 0);
 838                        retval = IS_ERR((void *)error) ?
 839                                PTR_ERR((void*)error) : -EINVAL;
 840                        goto out_free_dentry;
 841                }
 842
 843                if (!load_addr_set) {
 844                        load_addr_set = 1;
 845                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 846                        if (loc->elf_ex.e_type == ET_DYN) {
 847                                load_bias += error -
 848                                             ELF_PAGESTART(load_bias + vaddr);
 849                                load_addr += load_bias;
 850                                reloc_func_desc = load_bias;
 851                        }
 852                }
 853                k = elf_ppnt->p_vaddr;
 854                if (k < start_code)
 855                        start_code = k;
 856                if (start_data < k)
 857                        start_data = k;
 858
 859                /*
 860                 * Check to see if the section's size will overflow the
 861                 * allowed task size. Note that p_filesz must always be
 862                 * <= p_memsz so it is only necessary to check p_memsz.
 863                 */
 864                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 865                    elf_ppnt->p_memsz > TASK_SIZE ||
 866                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 867                        /* set_brk can never work. Avoid overflows. */
 868                        send_sig(SIGKILL, current, 0);
 869                        retval = -EINVAL;
 870                        goto out_free_dentry;
 871                }
 872
 873                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 874
 875                if (k > elf_bss)
 876                        elf_bss = k;
 877                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 878                        end_code = k;
 879                if (end_data < k)
 880                        end_data = k;
 881                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 882                if (k > elf_brk)
 883                        elf_brk = k;
 884        }
 885
 886        loc->elf_ex.e_entry += load_bias;
 887        elf_bss += load_bias;
 888        elf_brk += load_bias;
 889        start_code += load_bias;
 890        end_code += load_bias;
 891        start_data += load_bias;
 892        end_data += load_bias;
 893
 894        /* Calling set_brk effectively mmaps the pages that we need
 895         * for the bss and break sections.  We must do this before
 896         * mapping in the interpreter, to make sure it doesn't wind
 897         * up getting placed where the bss needs to go.
 898         */
 899        retval = set_brk(elf_bss, elf_brk);
 900        if (retval) {
 901                send_sig(SIGKILL, current, 0);
 902                goto out_free_dentry;
 903        }
 904        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 905                send_sig(SIGSEGV, current, 0);
 906                retval = -EFAULT; /* Nobody gets to see this, but.. */
 907                goto out_free_dentry;
 908        }
 909
 910        if (elf_interpreter) {
 911                unsigned long uninitialized_var(interp_map_addr);
 912
 913                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 914                                            interpreter,
 915                                            &interp_map_addr,
 916                                            load_bias);
 917                if (!IS_ERR((void *)elf_entry)) {
 918                        /*
 919                         * load_elf_interp() returns relocation
 920                         * adjustment
 921                         */
 922                        interp_load_addr = elf_entry;
 923                        elf_entry += loc->interp_elf_ex.e_entry;
 924                }
 925                if (BAD_ADDR(elf_entry)) {
 926                        force_sig(SIGSEGV, current);
 927                        retval = IS_ERR((void *)elf_entry) ?
 928                                        (int)elf_entry : -EINVAL;
 929                        goto out_free_dentry;
 930                }
 931                reloc_func_desc = interp_load_addr;
 932
 933                allow_write_access(interpreter);
 934                fput(interpreter);
 935                kfree(elf_interpreter);
 936        } else {
 937                elf_entry = loc->elf_ex.e_entry;
 938                if (BAD_ADDR(elf_entry)) {
 939                        force_sig(SIGSEGV, current);
 940                        retval = -EINVAL;
 941                        goto out_free_dentry;
 942                }
 943        }
 944
 945        kfree(elf_phdata);
 946
 947        sys_close(elf_exec_fileno);
 948
 949        set_binfmt(&elf_format);
 950
 951#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 952        retval = arch_setup_additional_pages(bprm, executable_stack);
 953        if (retval < 0) {
 954                send_sig(SIGKILL, current, 0);
 955                goto out;
 956        }
 957#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 958
 959        compute_creds(bprm);
 960        current->flags &= ~PF_FORKNOEXEC;
 961        retval = create_elf_tables(bprm, &loc->elf_ex,
 962                          load_addr, interp_load_addr);
 963        if (retval < 0) {
 964                send_sig(SIGKILL, current, 0);
 965                goto out;
 966        }
 967        /* N.B. passed_fileno might not be initialized? */
 968        current->mm->end_code = end_code;
 969        current->mm->start_code = start_code;
 970        current->mm->start_data = start_data;
 971        current->mm->end_data = end_data;
 972        current->mm->start_stack = bprm->p;
 973
 974#ifdef arch_randomize_brk
 975        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
 976                current->mm->brk = current->mm->start_brk =
 977                        arch_randomize_brk(current->mm);
 978#endif
 979
 980        if (current->personality & MMAP_PAGE_ZERO) {
 981                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 982                   and some applications "depend" upon this behavior.
 983                   Since we do not have the power to recompile these, we
 984                   emulate the SVr4 behavior. Sigh. */
 985                down_write(&current->mm->mmap_sem);
 986                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 987                                MAP_FIXED | MAP_PRIVATE, 0);
 988                up_write(&current->mm->mmap_sem);
 989        }
 990
 991#ifdef ELF_PLAT_INIT
 992        /*
 993         * The ABI may specify that certain registers be set up in special
 994         * ways (on i386 %edx is the address of a DT_FINI function, for
 995         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 996         * that the e_entry field is the address of the function descriptor
 997         * for the startup routine, rather than the address of the startup
 998         * routine itself.  This macro performs whatever initialization to
 999         * the regs structure is required as well as any relocations to the
1000         * function descriptor entries when executing dynamically links apps.
1001         */
1002        ELF_PLAT_INIT(regs, reloc_func_desc);
1003#endif
1004
1005        start_thread(regs, elf_entry, bprm->p);
1006        retval = 0;
1007out:
1008        kfree(loc);
1009out_ret:
1010        return retval;
1011
1012        /* error cleanup */
1013out_free_dentry:
1014        allow_write_access(interpreter);
1015        if (interpreter)
1016                fput(interpreter);
1017out_free_interp:
1018        kfree(elf_interpreter);
1019out_free_file:
1020        sys_close(elf_exec_fileno);
1021out_free_ph:
1022        kfree(elf_phdata);
1023        goto out;
1024}
1025
1026/* This is really simpleminded and specialized - we are loading an
1027   a.out library that is given an ELF header. */
1028static int load_elf_library(struct file *file)
1029{
1030        struct elf_phdr *elf_phdata;
1031        struct elf_phdr *eppnt;
1032        unsigned long elf_bss, bss, len;
1033        int retval, error, i, j;
1034        struct elfhdr elf_ex;
1035
1036        error = -ENOEXEC;
1037        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1038        if (retval != sizeof(elf_ex))
1039                goto out;
1040
1041        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1042                goto out;
1043
1044        /* First of all, some simple consistency checks */
1045        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1046            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1047                goto out;
1048
1049        /* Now read in all of the header information */
1050
1051        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1052        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1053
1054        error = -ENOMEM;
1055        elf_phdata = kmalloc(j, GFP_KERNEL);
1056        if (!elf_phdata)
1057                goto out;
1058
1059        eppnt = elf_phdata;
1060        error = -ENOEXEC;
1061        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1062        if (retval != j)
1063                goto out_free_ph;
1064
1065        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1066                if ((eppnt + i)->p_type == PT_LOAD)
1067                        j++;
1068        if (j != 1)
1069                goto out_free_ph;
1070
1071        while (eppnt->p_type != PT_LOAD)
1072                eppnt++;
1073
1074        /* Now use mmap to map the library into memory. */
1075        down_write(&current->mm->mmap_sem);
1076        error = do_mmap(file,
1077                        ELF_PAGESTART(eppnt->p_vaddr),
1078                        (eppnt->p_filesz +
1079                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1080                        PROT_READ | PROT_WRITE | PROT_EXEC,
1081                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1082                        (eppnt->p_offset -
1083                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1084        up_write(&current->mm->mmap_sem);
1085        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1086                goto out_free_ph;
1087
1088        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1089        if (padzero(elf_bss)) {
1090                error = -EFAULT;
1091                goto out_free_ph;
1092        }
1093
1094        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1095                            ELF_MIN_ALIGN - 1);
1096        bss = eppnt->p_memsz + eppnt->p_vaddr;
1097        if (bss > len) {
1098                down_write(&current->mm->mmap_sem);
1099                do_brk(len, bss - len);
1100                up_write(&current->mm->mmap_sem);
1101        }
1102        error = 0;
1103
1104out_free_ph:
1105        kfree(elf_phdata);
1106out:
1107        return error;
1108}
1109
1110/*
1111 * Note that some platforms still use traditional core dumps and not
1112 * the ELF core dump.  Each platform can select it as appropriate.
1113 */
1114#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1115
1116/*
1117 * ELF core dumper
1118 *
1119 * Modelled on fs/exec.c:aout_core_dump()
1120 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1121 */
1122/*
1123 * These are the only things you should do on a core-file: use only these
1124 * functions to write out all the necessary info.
1125 */
1126static int dump_write(struct file *file, const void *addr, int nr)
1127{
1128        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1129}
1130
1131static int dump_seek(struct file *file, loff_t off)
1132{
1133        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1134                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1135                        return 0;
1136        } else {
1137                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1138                if (!buf)
1139                        return 0;
1140                while (off > 0) {
1141                        unsigned long n = off;
1142                        if (n > PAGE_SIZE)
1143                                n = PAGE_SIZE;
1144                        if (!dump_write(file, buf, n))
1145                                return 0;
1146                        off -= n;
1147                }
1148                free_page((unsigned long)buf);
1149        }
1150        return 1;
1151}
1152
1153/*
1154 * Decide what to dump of a segment, part, all or none.
1155 */
1156static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157                                   unsigned long mm_flags)
1158{
1159#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1160
1161        /* The vma can be set up to tell us the answer directly.  */
1162        if (vma->vm_flags & VM_ALWAYSDUMP)
1163                goto whole;
1164
1165        /* Hugetlb memory check */
1166        if (vma->vm_flags & VM_HUGETLB) {
1167                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1168                        goto whole;
1169                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1170                        goto whole;
1171        }
1172
1173        /* Do not dump I/O mapped devices or special mappings */
1174        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1175                return 0;
1176
1177        /* By default, dump shared memory if mapped from an anonymous file. */
1178        if (vma->vm_flags & VM_SHARED) {
1179                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1180                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1181                        goto whole;
1182                return 0;
1183        }
1184
1185        /* Dump segments that have been written to.  */
1186        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1187                goto whole;
1188        if (vma->vm_file == NULL)
1189                return 0;
1190
1191        if (FILTER(MAPPED_PRIVATE))
1192                goto whole;
1193
1194        /*
1195         * If this looks like the beginning of a DSO or executable mapping,
1196         * check for an ELF header.  If we find one, dump the first page to
1197         * aid in determining what was mapped here.
1198         */
1199        if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1200                u32 __user *header = (u32 __user *) vma->vm_start;
1201                u32 word;
1202                /*
1203                 * Doing it this way gets the constant folded by GCC.
1204                 */
1205                union {
1206                        u32 cmp;
1207                        char elfmag[SELFMAG];
1208                } magic;
1209                BUILD_BUG_ON(SELFMAG != sizeof word);
1210                magic.elfmag[EI_MAG0] = ELFMAG0;
1211                magic.elfmag[EI_MAG1] = ELFMAG1;
1212                magic.elfmag[EI_MAG2] = ELFMAG2;
1213                magic.elfmag[EI_MAG3] = ELFMAG3;
1214                if (get_user(word, header) == 0 && word == magic.cmp)
1215                        return PAGE_SIZE;
1216        }
1217
1218#undef  FILTER
1219
1220        return 0;
1221
1222whole:
1223        return vma->vm_end - vma->vm_start;
1224}
1225
1226/* An ELF note in memory */
1227struct memelfnote
1228{
1229        const char *name;
1230        int type;
1231        unsigned int datasz;
1232        void *data;
1233};
1234
1235static int notesize(struct memelfnote *en)
1236{
1237        int sz;
1238
1239        sz = sizeof(struct elf_note);
1240        sz += roundup(strlen(en->name) + 1, 4);
1241        sz += roundup(en->datasz, 4);
1242
1243        return sz;
1244}
1245
1246#define DUMP_WRITE(addr, nr, foffset)   \
1247        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1248
1249static int alignfile(struct file *file, loff_t *foffset)
1250{
1251        static const char buf[4] = { 0, };
1252        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1253        return 1;
1254}
1255
1256static int writenote(struct memelfnote *men, struct file *file,
1257                        loff_t *foffset)
1258{
1259        struct elf_note en;
1260        en.n_namesz = strlen(men->name) + 1;
1261        en.n_descsz = men->datasz;
1262        en.n_type = men->type;
1263
1264        DUMP_WRITE(&en, sizeof(en), foffset);
1265        DUMP_WRITE(men->name, en.n_namesz, foffset);
1266        if (!alignfile(file, foffset))
1267                return 0;
1268        DUMP_WRITE(men->data, men->datasz, foffset);
1269        if (!alignfile(file, foffset))
1270                return 0;
1271
1272        return 1;
1273}
1274#undef DUMP_WRITE
1275
1276#define DUMP_WRITE(addr, nr)    \
1277        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1278                goto end_coredump;
1279#define DUMP_SEEK(off)  \
1280        if (!dump_seek(file, (off))) \
1281                goto end_coredump;
1282
1283static void fill_elf_header(struct elfhdr *elf, int segs,
1284                            u16 machine, u32 flags, u8 osabi)
1285{
1286        memset(elf, 0, sizeof(*elf));
1287
1288        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1289        elf->e_ident[EI_CLASS] = ELF_CLASS;
1290        elf->e_ident[EI_DATA] = ELF_DATA;
1291        elf->e_ident[EI_VERSION] = EV_CURRENT;
1292        elf->e_ident[EI_OSABI] = ELF_OSABI;
1293
1294        elf->e_type = ET_CORE;
1295        elf->e_machine = machine;
1296        elf->e_version = EV_CURRENT;
1297        elf->e_phoff = sizeof(struct elfhdr);
1298        elf->e_flags = flags;
1299        elf->e_ehsize = sizeof(struct elfhdr);
1300        elf->e_phentsize = sizeof(struct elf_phdr);
1301        elf->e_phnum = segs;
1302
1303        return;
1304}
1305
1306static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1307{
1308        phdr->p_type = PT_NOTE;
1309        phdr->p_offset = offset;
1310        phdr->p_vaddr = 0;
1311        phdr->p_paddr = 0;
1312        phdr->p_filesz = sz;
1313        phdr->p_memsz = 0;
1314        phdr->p_flags = 0;
1315        phdr->p_align = 0;
1316        return;
1317}
1318
1319static void fill_note(struct memelfnote *note, const char *name, int type, 
1320                unsigned int sz, void *data)
1321{
1322        note->name = name;
1323        note->type = type;
1324        note->datasz = sz;
1325        note->data = data;
1326        return;
1327}
1328
1329/*
1330 * fill up all the fields in prstatus from the given task struct, except
1331 * registers which need to be filled up separately.
1332 */
1333static void fill_prstatus(struct elf_prstatus *prstatus,
1334                struct task_struct *p, long signr)
1335{
1336        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1337        prstatus->pr_sigpend = p->pending.signal.sig[0];
1338        prstatus->pr_sighold = p->blocked.sig[0];
1339        prstatus->pr_pid = task_pid_vnr(p);
1340        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1341        prstatus->pr_pgrp = task_pgrp_vnr(p);
1342        prstatus->pr_sid = task_session_vnr(p);
1343        if (thread_group_leader(p)) {
1344                struct task_cputime cputime;
1345
1346                /*
1347                 * This is the record for the group leader.  It shows the
1348                 * group-wide total, not its individual thread total.
1349                 */
1350                thread_group_cputime(p, &cputime);
1351                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1352                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1353        } else {
1354                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1355                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1356        }
1357        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1358        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1359}
1360
1361static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1362                       struct mm_struct *mm)
1363{
1364        unsigned int i, len;
1365        
1366        /* first copy the parameters from user space */
1367        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1368
1369        len = mm->arg_end - mm->arg_start;
1370        if (len >= ELF_PRARGSZ)
1371                len = ELF_PRARGSZ-1;
1372        if (copy_from_user(&psinfo->pr_psargs,
1373                           (const char __user *)mm->arg_start, len))
1374                return -EFAULT;
1375        for(i = 0; i < len; i++)
1376                if (psinfo->pr_psargs[i] == 0)
1377                        psinfo->pr_psargs[i] = ' ';
1378        psinfo->pr_psargs[len] = 0;
1379
1380        psinfo->pr_pid = task_pid_vnr(p);
1381        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1382        psinfo->pr_pgrp = task_pgrp_vnr(p);
1383        psinfo->pr_sid = task_session_vnr(p);
1384
1385        i = p->state ? ffz(~p->state) + 1 : 0;
1386        psinfo->pr_state = i;
1387        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1388        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1389        psinfo->pr_nice = task_nice(p);
1390        psinfo->pr_flag = p->flags;
1391        SET_UID(psinfo->pr_uid, p->uid);
1392        SET_GID(psinfo->pr_gid, p->gid);
1393        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1394        
1395        return 0;
1396}
1397
1398static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1399{
1400        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1401        int i = 0;
1402        do
1403                i += 2;
1404        while (auxv[i - 2] != AT_NULL);
1405        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1406}
1407
1408#ifdef CORE_DUMP_USE_REGSET
1409#include <linux/regset.h>
1410
1411struct elf_thread_core_info {
1412        struct elf_thread_core_info *next;
1413        struct task_struct *task;
1414        struct elf_prstatus prstatus;
1415        struct memelfnote notes[0];
1416};
1417
1418struct elf_note_info {
1419        struct elf_thread_core_info *thread;
1420        struct memelfnote psinfo;
1421        struct memelfnote auxv;
1422        size_t size;
1423        int thread_notes;
1424};
1425
1426/*
1427 * When a regset has a writeback hook, we call it on each thread before
1428 * dumping user memory.  On register window machines, this makes sure the
1429 * user memory backing the register data is up to date before we read it.
1430 */
1431static void do_thread_regset_writeback(struct task_struct *task,
1432                                       const struct user_regset *regset)
1433{
1434        if (regset->writeback)
1435                regset->writeback(task, regset, 1);
1436}
1437
1438static int fill_thread_core_info(struct elf_thread_core_info *t,
1439                                 const struct user_regset_view *view,
1440                                 long signr, size_t *total)
1441{
1442        unsigned int i;
1443
1444        /*
1445         * NT_PRSTATUS is the one special case, because the regset data
1446         * goes into the pr_reg field inside the note contents, rather
1447         * than being the whole note contents.  We fill the reset in here.
1448         * We assume that regset 0 is NT_PRSTATUS.
1449         */
1450        fill_prstatus(&t->prstatus, t->task, signr);
1451        (void) view->regsets[0].get(t->task, &view->regsets[0],
1452                                    0, sizeof(t->prstatus.pr_reg),
1453                                    &t->prstatus.pr_reg, NULL);
1454
1455        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1456                  sizeof(t->prstatus), &t->prstatus);
1457        *total += notesize(&t->notes[0]);
1458
1459        do_thread_regset_writeback(t->task, &view->regsets[0]);
1460
1461        /*
1462         * Each other regset might generate a note too.  For each regset
1463         * that has no core_note_type or is inactive, we leave t->notes[i]
1464         * all zero and we'll know to skip writing it later.
1465         */
1466        for (i = 1; i < view->n; ++i) {
1467                const struct user_regset *regset = &view->regsets[i];
1468                do_thread_regset_writeback(t->task, regset);
1469                if (regset->core_note_type &&
1470                    (!regset->active || regset->active(t->task, regset))) {
1471                        int ret;
1472                        size_t size = regset->n * regset->size;
1473                        void *data = kmalloc(size, GFP_KERNEL);
1474                        if (unlikely(!data))
1475                                return 0;
1476                        ret = regset->get(t->task, regset,
1477                                          0, size, data, NULL);
1478                        if (unlikely(ret))
1479                                kfree(data);
1480                        else {
1481                                if (regset->core_note_type != NT_PRFPREG)
1482                                        fill_note(&t->notes[i], "LINUX",
1483                                                  regset->core_note_type,
1484                                                  size, data);
1485                                else {
1486                                        t->prstatus.pr_fpvalid = 1;
1487                                        fill_note(&t->notes[i], "CORE",
1488                                                  NT_PRFPREG, size, data);
1489                                }
1490                                *total += notesize(&t->notes[i]);
1491                        }
1492                }
1493        }
1494
1495        return 1;
1496}
1497
1498static int fill_note_info(struct elfhdr *elf, int phdrs,
1499                          struct elf_note_info *info,
1500                          long signr, struct pt_regs *regs)
1501{
1502        struct task_struct *dump_task = current;
1503        const struct user_regset_view *view = task_user_regset_view(dump_task);
1504        struct elf_thread_core_info *t;
1505        struct elf_prpsinfo *psinfo;
1506        struct core_thread *ct;
1507        unsigned int i;
1508
1509        info->size = 0;
1510        info->thread = NULL;
1511
1512        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1513        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1514
1515        if (psinfo == NULL)
1516                return 0;
1517
1518        /*
1519         * Figure out how many notes we're going to need for each thread.
1520         */
1521        info->thread_notes = 0;
1522        for (i = 0; i < view->n; ++i)
1523                if (view->regsets[i].core_note_type != 0)
1524                        ++info->thread_notes;
1525
1526        /*
1527         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1528         * since it is our one special case.
1529         */
1530        if (unlikely(info->thread_notes == 0) ||
1531            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1532                WARN_ON(1);
1533                return 0;
1534        }
1535
1536        /*
1537         * Initialize the ELF file header.
1538         */
1539        fill_elf_header(elf, phdrs,
1540                        view->e_machine, view->e_flags, view->ei_osabi);
1541
1542        /*
1543         * Allocate a structure for each thread.
1544         */
1545        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1546                t = kzalloc(offsetof(struct elf_thread_core_info,
1547                                     notes[info->thread_notes]),
1548                            GFP_KERNEL);
1549                if (unlikely(!t))
1550                        return 0;
1551
1552                t->task = ct->task;
1553                if (ct->task == dump_task || !info->thread) {
1554                        t->next = info->thread;
1555                        info->thread = t;
1556                } else {
1557                        /*
1558                         * Make sure to keep the original task at
1559                         * the head of the list.
1560                         */
1561                        t->next = info->thread->next;
1562                        info->thread->next = t;
1563                }
1564        }
1565
1566        /*
1567         * Now fill in each thread's information.
1568         */
1569        for (t = info->thread; t != NULL; t = t->next)
1570                if (!fill_thread_core_info(t, view, signr, &info->size))
1571                        return 0;
1572
1573        /*
1574         * Fill in the two process-wide notes.
1575         */
1576        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1577        info->size += notesize(&info->psinfo);
1578
1579        fill_auxv_note(&info->auxv, current->mm);
1580        info->size += notesize(&info->auxv);
1581
1582        return 1;
1583}
1584
1585static size_t get_note_info_size(struct elf_note_info *info)
1586{
1587        return info->size;
1588}
1589
1590/*
1591 * Write all the notes for each thread.  When writing the first thread, the
1592 * process-wide notes are interleaved after the first thread-specific note.
1593 */
1594static int write_note_info(struct elf_note_info *info,
1595                           struct file *file, loff_t *foffset)
1596{
1597        bool first = 1;
1598        struct elf_thread_core_info *t = info->thread;
1599
1600        do {
1601                int i;
1602
1603                if (!writenote(&t->notes[0], file, foffset))
1604                        return 0;
1605
1606                if (first && !writenote(&info->psinfo, file, foffset))
1607                        return 0;
1608                if (first && !writenote(&info->auxv, file, foffset))
1609                        return 0;
1610
1611                for (i = 1; i < info->thread_notes; ++i)
1612                        if (t->notes[i].data &&
1613                            !writenote(&t->notes[i], file, foffset))
1614                                return 0;
1615
1616                first = 0;
1617                t = t->next;
1618        } while (t);
1619
1620        return 1;
1621}
1622
1623static void free_note_info(struct elf_note_info *info)
1624{
1625        struct elf_thread_core_info *threads = info->thread;
1626        while (threads) {
1627                unsigned int i;
1628                struct elf_thread_core_info *t = threads;
1629                threads = t->next;
1630                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1631                for (i = 1; i < info->thread_notes; ++i)
1632                        kfree(t->notes[i].data);
1633                kfree(t);
1634        }
1635        kfree(info->psinfo.data);
1636}
1637
1638#else
1639
1640/* Here is the structure in which status of each thread is captured. */
1641struct elf_thread_status
1642{
1643        struct list_head list;
1644        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1645        elf_fpregset_t fpu;             /* NT_PRFPREG */
1646        struct task_struct *thread;
1647#ifdef ELF_CORE_COPY_XFPREGS
1648        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1649#endif
1650        struct memelfnote notes[3];
1651        int num_notes;
1652};
1653
1654/*
1655 * In order to add the specific thread information for the elf file format,
1656 * we need to keep a linked list of every threads pr_status and then create
1657 * a single section for them in the final core file.
1658 */
1659static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1660{
1661        int sz = 0;
1662        struct task_struct *p = t->thread;
1663        t->num_notes = 0;
1664
1665        fill_prstatus(&t->prstatus, p, signr);
1666        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1667        
1668        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1669                  &(t->prstatus));
1670        t->num_notes++;
1671        sz += notesize(&t->notes[0]);
1672
1673        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1674                                                                &t->fpu))) {
1675                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1676                          &(t->fpu));
1677                t->num_notes++;
1678                sz += notesize(&t->notes[1]);
1679        }
1680
1681#ifdef ELF_CORE_COPY_XFPREGS
1682        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1683                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1684                          sizeof(t->xfpu), &t->xfpu);
1685                t->num_notes++;
1686                sz += notesize(&t->notes[2]);
1687        }
1688#endif  
1689        return sz;
1690}
1691
1692struct elf_note_info {
1693        struct memelfnote *notes;
1694        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1695        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1696        struct list_head thread_list;
1697        elf_fpregset_t *fpu;
1698#ifdef ELF_CORE_COPY_XFPREGS
1699        elf_fpxregset_t *xfpu;
1700#endif
1701        int thread_status_size;
1702        int numnote;
1703};
1704
1705static int fill_note_info(struct elfhdr *elf, int phdrs,
1706                          struct elf_note_info *info,
1707                          long signr, struct pt_regs *regs)
1708{
1709#define NUM_NOTES       6
1710        struct list_head *t;
1711
1712        info->notes = NULL;
1713        info->prstatus = NULL;
1714        info->psinfo = NULL;
1715        info->fpu = NULL;
1716#ifdef ELF_CORE_COPY_XFPREGS
1717        info->xfpu = NULL;
1718#endif
1719        INIT_LIST_HEAD(&info->thread_list);
1720
1721        info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1722                              GFP_KERNEL);
1723        if (!info->notes)
1724                return 0;
1725        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1726        if (!info->psinfo)
1727                return 0;
1728        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1729        if (!info->prstatus)
1730                return 0;
1731        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1732        if (!info->fpu)
1733                return 0;
1734#ifdef ELF_CORE_COPY_XFPREGS
1735        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1736        if (!info->xfpu)
1737                return 0;
1738#endif
1739
1740        info->thread_status_size = 0;
1741        if (signr) {
1742                struct core_thread *ct;
1743                struct elf_thread_status *ets;
1744
1745                for (ct = current->mm->core_state->dumper.next;
1746                                                ct; ct = ct->next) {
1747                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1748                        if (!ets)
1749                                return 0;
1750
1751                        ets->thread = ct->task;
1752                        list_add(&ets->list, &info->thread_list);
1753                }
1754
1755                list_for_each(t, &info->thread_list) {
1756                        int sz;
1757
1758                        ets = list_entry(t, struct elf_thread_status, list);
1759                        sz = elf_dump_thread_status(signr, ets);
1760                        info->thread_status_size += sz;
1761                }
1762        }
1763        /* now collect the dump for the current */
1764        memset(info->prstatus, 0, sizeof(*info->prstatus));
1765        fill_prstatus(info->prstatus, current, signr);
1766        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1767
1768        /* Set up header */
1769        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1770
1771        /*
1772         * Set up the notes in similar form to SVR4 core dumps made
1773         * with info from their /proc.
1774         */
1775
1776        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1777                  sizeof(*info->prstatus), info->prstatus);
1778        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1779        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1780                  sizeof(*info->psinfo), info->psinfo);
1781
1782        info->numnote = 2;
1783
1784        fill_auxv_note(&info->notes[info->numnote++], current->mm);
1785
1786        /* Try to dump the FPU. */
1787        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1788                                                               info->fpu);
1789        if (info->prstatus->pr_fpvalid)
1790                fill_note(info->notes + info->numnote++,
1791                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1792#ifdef ELF_CORE_COPY_XFPREGS
1793        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1794                fill_note(info->notes + info->numnote++,
1795                          "LINUX", ELF_CORE_XFPREG_TYPE,
1796                          sizeof(*info->xfpu), info->xfpu);
1797#endif
1798
1799        return 1;
1800
1801#undef NUM_NOTES
1802}
1803
1804static size_t get_note_info_size(struct elf_note_info *info)
1805{
1806        int sz = 0;
1807        int i;
1808
1809        for (i = 0; i < info->numnote; i++)
1810                sz += notesize(info->notes + i);
1811
1812        sz += info->thread_status_size;
1813
1814        return sz;
1815}
1816
1817static int write_note_info(struct elf_note_info *info,
1818                           struct file *file, loff_t *foffset)
1819{
1820        int i;
1821        struct list_head *t;
1822
1823        for (i = 0; i < info->numnote; i++)
1824                if (!writenote(info->notes + i, file, foffset))
1825                        return 0;
1826
1827        /* write out the thread status notes section */
1828        list_for_each(t, &info->thread_list) {
1829                struct elf_thread_status *tmp =
1830                                list_entry(t, struct elf_thread_status, list);
1831
1832                for (i = 0; i < tmp->num_notes; i++)
1833                        if (!writenote(&tmp->notes[i], file, foffset))
1834                                return 0;
1835        }
1836
1837        return 1;
1838}
1839
1840static void free_note_info(struct elf_note_info *info)
1841{
1842        while (!list_empty(&info->thread_list)) {
1843                struct list_head *tmp = info->thread_list.next;
1844                list_del(tmp);
1845                kfree(list_entry(tmp, struct elf_thread_status, list));
1846        }
1847
1848        kfree(info->prstatus);
1849        kfree(info->psinfo);
1850        kfree(info->notes);
1851        kfree(info->fpu);
1852#ifdef ELF_CORE_COPY_XFPREGS
1853        kfree(info->xfpu);
1854#endif
1855}
1856
1857#endif
1858
1859static struct vm_area_struct *first_vma(struct task_struct *tsk,
1860                                        struct vm_area_struct *gate_vma)
1861{
1862        struct vm_area_struct *ret = tsk->mm->mmap;
1863
1864        if (ret)
1865                return ret;
1866        return gate_vma;
1867}
1868/*
1869 * Helper function for iterating across a vma list.  It ensures that the caller
1870 * will visit `gate_vma' prior to terminating the search.
1871 */
1872static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1873                                        struct vm_area_struct *gate_vma)
1874{
1875        struct vm_area_struct *ret;
1876
1877        ret = this_vma->vm_next;
1878        if (ret)
1879                return ret;
1880        if (this_vma == gate_vma)
1881                return NULL;
1882        return gate_vma;
1883}
1884
1885/*
1886 * Actual dumper
1887 *
1888 * This is a two-pass process; first we find the offsets of the bits,
1889 * and then they are actually written out.  If we run out of core limit
1890 * we just truncate.
1891 */
1892static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1893{
1894        int has_dumped = 0;
1895        mm_segment_t fs;
1896        int segs;
1897        size_t size = 0;
1898        struct vm_area_struct *vma, *gate_vma;
1899        struct elfhdr *elf = NULL;
1900        loff_t offset = 0, dataoff, foffset;
1901        unsigned long mm_flags;
1902        struct elf_note_info info;
1903
1904        /*
1905         * We no longer stop all VM operations.
1906         * 
1907         * This is because those proceses that could possibly change map_count
1908         * or the mmap / vma pages are now blocked in do_exit on current
1909         * finishing this core dump.
1910         *
1911         * Only ptrace can touch these memory addresses, but it doesn't change
1912         * the map_count or the pages allocated. So no possibility of crashing
1913         * exists while dumping the mm->vm_next areas to the core file.
1914         */
1915  
1916        /* alloc memory for large data structures: too large to be on stack */
1917        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1918        if (!elf)
1919                goto out;
1920        
1921        segs = current->mm->map_count;
1922#ifdef ELF_CORE_EXTRA_PHDRS
1923        segs += ELF_CORE_EXTRA_PHDRS;
1924#endif
1925
1926        gate_vma = get_gate_vma(current);
1927        if (gate_vma != NULL)
1928                segs++;
1929
1930        /*
1931         * Collect all the non-memory information about the process for the
1932         * notes.  This also sets up the file header.
1933         */
1934        if (!fill_note_info(elf, segs + 1, /* including notes section */
1935                            &info, signr, regs))
1936                goto cleanup;
1937
1938        has_dumped = 1;
1939        current->flags |= PF_DUMPCORE;
1940  
1941        fs = get_fs();
1942        set_fs(KERNEL_DS);
1943
1944        DUMP_WRITE(elf, sizeof(*elf));
1945        offset += sizeof(*elf);                         /* Elf header */
1946        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1947        foffset = offset;
1948
1949        /* Write notes phdr entry */
1950        {
1951                struct elf_phdr phdr;
1952                size_t sz = get_note_info_size(&info);
1953
1954                sz += elf_coredump_extra_notes_size();
1955
1956                fill_elf_note_phdr(&phdr, sz, offset);
1957                offset += sz;
1958                DUMP_WRITE(&phdr, sizeof(phdr));
1959        }
1960
1961        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1962
1963        /*
1964         * We must use the same mm->flags while dumping core to avoid
1965         * inconsistency between the program headers and bodies, otherwise an
1966         * unusable core file can be generated.
1967         */
1968        mm_flags = current->mm->flags;
1969
1970        /* Write program headers for segments dump */
1971        for (vma = first_vma(current, gate_vma); vma != NULL;
1972                        vma = next_vma(vma, gate_vma)) {
1973                struct elf_phdr phdr;
1974
1975                phdr.p_type = PT_LOAD;
1976                phdr.p_offset = offset;
1977                phdr.p_vaddr = vma->vm_start;
1978                phdr.p_paddr = 0;
1979                phdr.p_filesz = vma_dump_size(vma, mm_flags);
1980                phdr.p_memsz = vma->vm_end - vma->vm_start;
1981                offset += phdr.p_filesz;
1982                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1983                if (vma->vm_flags & VM_WRITE)
1984                        phdr.p_flags |= PF_W;
1985                if (vma->vm_flags & VM_EXEC)
1986                        phdr.p_flags |= PF_X;
1987                phdr.p_align = ELF_EXEC_PAGESIZE;
1988
1989                DUMP_WRITE(&phdr, sizeof(phdr));
1990        }
1991
1992#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1993        ELF_CORE_WRITE_EXTRA_PHDRS;
1994#endif
1995
1996        /* write out the notes section */
1997        if (!write_note_info(&info, file, &foffset))
1998                goto end_coredump;
1999
2000        if (elf_coredump_extra_notes_write(file, &foffset))
2001                goto end_coredump;
2002
2003        /* Align to page */
2004        DUMP_SEEK(dataoff - foffset);
2005
2006        for (vma = first_vma(current, gate_vma); vma != NULL;
2007                        vma = next_vma(vma, gate_vma)) {
2008                unsigned long addr;
2009                unsigned long end;
2010
2011                end = vma->vm_start + vma_dump_size(vma, mm_flags);
2012
2013                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2014                        struct page *page;
2015                        struct vm_area_struct *tmp_vma;
2016
2017                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2018                                                &page, &tmp_vma) <= 0) {
2019                                DUMP_SEEK(PAGE_SIZE);
2020                        } else {
2021                                if (page == ZERO_PAGE(0)) {
2022                                        if (!dump_seek(file, PAGE_SIZE)) {
2023                                                page_cache_release(page);
2024                                                goto end_coredump;
2025                                        }
2026                                } else {
2027                                        void *kaddr;
2028                                        flush_cache_page(tmp_vma, addr,
2029                                                         page_to_pfn(page));
2030                                        kaddr = kmap(page);
2031                                        if ((size += PAGE_SIZE) > limit ||
2032                                            !dump_write(file, kaddr,
2033                                            PAGE_SIZE)) {
2034                                                kunmap(page);
2035                                                page_cache_release(page);
2036                                                goto end_coredump;
2037                                        }
2038                                        kunmap(page);
2039                                }
2040                                page_cache_release(page);
2041                        }
2042                }
2043        }
2044
2045#ifdef ELF_CORE_WRITE_EXTRA_DATA
2046        ELF_CORE_WRITE_EXTRA_DATA;
2047#endif
2048
2049end_coredump:
2050        set_fs(fs);
2051
2052cleanup:
2053        free_note_info(&info);
2054        kfree(elf);
2055out:
2056        return has_dumped;
2057}
2058
2059#endif          /* USE_ELF_CORE_DUMP */
2060
2061static int __init init_elf_binfmt(void)
2062{
2063        return register_binfmt(&elf_format);
2064}
2065
2066static void __exit exit_elf_binfmt(void)
2067{
2068        /* Remove the COFF and ELF loaders. */
2069        unregister_binfmt(&elf_format);
2070}
2071
2072core_initcall(init_elf_binfmt);
2073module_exit(exit_elf_binfmt);
2074MODULE_LICENSE("GPL");
2075
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.