linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/errno.h>
  20#include <linux/signal.h>
  21#include <linux/binfmts.h>
  22#include <linux/string.h>
  23#include <linux/file.h>
  24#include <linux/fcntl.h>
  25#include <linux/ptrace.h>
  26#include <linux/slab.h>
  27#include <linux/shm.h>
  28#include <linux/personality.h>
  29#include <linux/elfcore.h>
  30#include <linux/init.h>
  31#include <linux/highuid.h>
  32#include <linux/smp.h>
  33#include <linux/compiler.h>
  34#include <linux/highmem.h>
  35#include <linux/pagemap.h>
  36#include <linux/security.h>
  37#include <linux/syscalls.h>
  38#include <linux/random.h>
  39#include <linux/elf.h>
  40#include <linux/utsname.h>
  41#include <asm/uaccess.h>
  42#include <asm/param.h>
  43#include <asm/page.h>
  44
  45static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  46static int load_elf_library(struct file *);
  47static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  48                                int, int, unsigned long);
  49
  50/*
  51 * If we don't support core dumping, then supply a NULL so we
  52 * don't even try.
  53 */
  54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
  56#else
  57#define elf_core_dump   NULL
  58#endif
  59
  60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  61#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  62#else
  63#define ELF_MIN_ALIGN   PAGE_SIZE
  64#endif
  65
  66#ifndef ELF_CORE_EFLAGS
  67#define ELF_CORE_EFLAGS 0
  68#endif
  69
  70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  73
  74static struct linux_binfmt elf_format = {
  75                .module         = THIS_MODULE,
  76                .load_binary    = load_elf_binary,
  77                .load_shlib     = load_elf_library,
  78                .core_dump      = elf_core_dump,
  79                .min_coredump   = ELF_EXEC_PAGESIZE,
  80                .hasvdso        = 1
  81};
  82
  83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  84
  85static int set_brk(unsigned long start, unsigned long end)
  86{
  87        start = ELF_PAGEALIGN(start);
  88        end = ELF_PAGEALIGN(end);
  89        if (end > start) {
  90                unsigned long addr;
  91                down_write(&current->mm->mmap_sem);
  92                addr = do_brk(start, end - start);
  93                up_write(&current->mm->mmap_sem);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a little clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134#ifndef ELF_BASE_PLATFORM
 135/*
 136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 138 * will be copied to the user stack in the same manner as AT_PLATFORM.
 139 */
 140#define ELF_BASE_PLATFORM NULL
 141#endif
 142
 143static int
 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 145                unsigned long load_addr, unsigned long interp_load_addr)
 146{
 147        unsigned long p = bprm->p;
 148        int argc = bprm->argc;
 149        int envc = bprm->envc;
 150        elf_addr_t __user *argv;
 151        elf_addr_t __user *envp;
 152        elf_addr_t __user *sp;
 153        elf_addr_t __user *u_platform;
 154        elf_addr_t __user *u_base_platform;
 155        const char *k_platform = ELF_PLATFORM;
 156        const char *k_base_platform = ELF_BASE_PLATFORM;
 157        int items;
 158        elf_addr_t *elf_info;
 159        int ei_index = 0;
 160        struct task_struct *tsk = current;
 161        struct vm_area_struct *vma;
 162
 163        /*
 164         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 165         * evictions by the processes running on the same package. One
 166         * thing we can do is to shuffle the initial stack for them.
 167         */
 168
 169        p = arch_align_stack(p);
 170
 171        /*
 172         * If this architecture has a platform capability string, copy it
 173         * to userspace.  In some cases (Sparc), this info is impossible
 174         * for userspace to get any other way, in others (i386) it is
 175         * merely difficult.
 176         */
 177        u_platform = NULL;
 178        if (k_platform) {
 179                size_t len = strlen(k_platform) + 1;
 180
 181                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 182                if (__copy_to_user(u_platform, k_platform, len))
 183                        return -EFAULT;
 184        }
 185
 186        /*
 187         * If this architecture has a "base" platform capability
 188         * string, copy it to userspace.
 189         */
 190        u_base_platform = NULL;
 191        if (k_base_platform) {
 192                size_t len = strlen(k_base_platform) + 1;
 193
 194                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 195                if (__copy_to_user(u_base_platform, k_base_platform, len))
 196                        return -EFAULT;
 197        }
 198
 199        /* Create the ELF interpreter info */
 200        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 201        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 202#define NEW_AUX_ENT(id, val) \
 203        do { \
 204                elf_info[ei_index++] = id; \
 205                elf_info[ei_index++] = val; \
 206        } while (0)
 207
 208#ifdef ARCH_DLINFO
 209        /* 
 210         * ARCH_DLINFO must come first so PPC can do its special alignment of
 211         * AUXV.
 212         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 213         * ARCH_DLINFO changes
 214         */
 215        ARCH_DLINFO;
 216#endif
 217        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 218        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 219        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 220        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 221        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 222        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 223        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 224        NEW_AUX_ENT(AT_FLAGS, 0);
 225        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 226        NEW_AUX_ENT(AT_UID, tsk->uid);
 227        NEW_AUX_ENT(AT_EUID, tsk->euid);
 228        NEW_AUX_ENT(AT_GID, tsk->gid);
 229        NEW_AUX_ENT(AT_EGID, tsk->egid);
 230        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 231        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 232        if (k_platform) {
 233                NEW_AUX_ENT(AT_PLATFORM,
 234                            (elf_addr_t)(unsigned long)u_platform);
 235        }
 236        if (k_base_platform) {
 237                NEW_AUX_ENT(AT_BASE_PLATFORM,
 238                            (elf_addr_t)(unsigned long)u_base_platform);
 239        }
 240        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 241                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 242        }
 243#undef NEW_AUX_ENT
 244        /* AT_NULL is zero; clear the rest too */
 245        memset(&elf_info[ei_index], 0,
 246               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 247
 248        /* And advance past the AT_NULL entry.  */
 249        ei_index += 2;
 250
 251        sp = STACK_ADD(p, ei_index);
 252
 253        items = (argc + 1) + (envc + 1) + 1;
 254        bprm->p = STACK_ROUND(sp, items);
 255
 256        /* Point sp at the lowest address on the stack */
 257#ifdef CONFIG_STACK_GROWSUP
 258        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 259        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 260#else
 261        sp = (elf_addr_t __user *)bprm->p;
 262#endif
 263
 264
 265        /*
 266         * Grow the stack manually; some architectures have a limit on how
 267         * far ahead a user-space access may be in order to grow the stack.
 268         */
 269        vma = find_extend_vma(current->mm, bprm->p);
 270        if (!vma)
 271                return -EFAULT;
 272
 273        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 274        if (__put_user(argc, sp++))
 275                return -EFAULT;
 276        argv = sp;
 277        envp = argv + argc + 1;
 278
 279        /* Populate argv and envp */
 280        p = current->mm->arg_end = current->mm->arg_start;
 281        while (argc-- > 0) {
 282                size_t len;
 283                if (__put_user((elf_addr_t)p, argv++))
 284                        return -EFAULT;
 285                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 286                if (!len || len > MAX_ARG_STRLEN)
 287                        return -EINVAL;
 288                p += len;
 289        }
 290        if (__put_user(0, argv))
 291                return -EFAULT;
 292        current->mm->arg_end = current->mm->env_start = p;
 293        while (envc-- > 0) {
 294                size_t len;
 295                if (__put_user((elf_addr_t)p, envp++))
 296                        return -EFAULT;
 297                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 298                if (!len || len > MAX_ARG_STRLEN)
 299                        return -EINVAL;
 300                p += len;
 301        }
 302        if (__put_user(0, envp))
 303                return -EFAULT;
 304        current->mm->env_end = p;
 305
 306        /* Put the elf_info on the stack in the right place.  */
 307        sp = (elf_addr_t __user *)envp + 1;
 308        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 309                return -EFAULT;
 310        return 0;
 311}
 312
 313#ifndef elf_map
 314
 315static unsigned long elf_map(struct file *filep, unsigned long addr,
 316                struct elf_phdr *eppnt, int prot, int type,
 317                unsigned long total_size)
 318{
 319        unsigned long map_addr;
 320        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 321        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 322        addr = ELF_PAGESTART(addr);
 323        size = ELF_PAGEALIGN(size);
 324
 325        /* mmap() will return -EINVAL if given a zero size, but a
 326         * segment with zero filesize is perfectly valid */
 327        if (!size)
 328                return addr;
 329
 330        down_write(&current->mm->mmap_sem);
 331        /*
 332        * total_size is the size of the ELF (interpreter) image.
 333        * The _first_ mmap needs to know the full size, otherwise
 334        * randomization might put this image into an overlapping
 335        * position with the ELF binary image. (since size < total_size)
 336        * So we first map the 'big' image - and unmap the remainder at
 337        * the end. (which unmap is needed for ELF images with holes.)
 338        */
 339        if (total_size) {
 340                total_size = ELF_PAGEALIGN(total_size);
 341                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 342                if (!BAD_ADDR(map_addr))
 343                        do_munmap(current->mm, map_addr+size, total_size-size);
 344        } else
 345                map_addr = do_mmap(filep, addr, size, prot, type, off);
 346
 347        up_write(&current->mm->mmap_sem);
 348        return(map_addr);
 349}
 350
 351#endif /* !elf_map */
 352
 353static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 354{
 355        int i, first_idx = -1, last_idx = -1;
 356
 357        for (i = 0; i < nr; i++) {
 358                if (cmds[i].p_type == PT_LOAD) {
 359                        last_idx = i;
 360                        if (first_idx == -1)
 361                                first_idx = i;
 362                }
 363        }
 364        if (first_idx == -1)
 365                return 0;
 366
 367        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 368                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 369}
 370
 371
 372/* This is much more generalized than the library routine read function,
 373   so we keep this separate.  Technically the library read function
 374   is only provided so that we can read a.out libraries that have
 375   an ELF header */
 376
 377static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 378                struct file *interpreter, unsigned long *interp_map_addr,
 379                unsigned long no_base)
 380{
 381        struct elf_phdr *elf_phdata;
 382        struct elf_phdr *eppnt;
 383        unsigned long load_addr = 0;
 384        int load_addr_set = 0;
 385        unsigned long last_bss = 0, elf_bss = 0;
 386        unsigned long error = ~0UL;
 387        unsigned long total_size;
 388        int retval, i, size;
 389
 390        /* First of all, some simple consistency checks */
 391        if (interp_elf_ex->e_type != ET_EXEC &&
 392            interp_elf_ex->e_type != ET_DYN)
 393                goto out;
 394        if (!elf_check_arch(interp_elf_ex))
 395                goto out;
 396        if (!interpreter->f_op || !interpreter->f_op->mmap)
 397                goto out;
 398
 399        /*
 400         * If the size of this structure has changed, then punt, since
 401         * we will be doing the wrong thing.
 402         */
 403        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 404                goto out;
 405        if (interp_elf_ex->e_phnum < 1 ||
 406                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 407                goto out;
 408
 409        /* Now read in all of the header information */
 410        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 411        if (size > ELF_MIN_ALIGN)
 412                goto out;
 413        elf_phdata = kmalloc(size, GFP_KERNEL);
 414        if (!elf_phdata)
 415                goto out;
 416
 417        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 418                             (char *)elf_phdata,size);
 419        error = -EIO;
 420        if (retval != size) {
 421                if (retval < 0)
 422                        error = retval; 
 423                goto out_close;
 424        }
 425
 426        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 427        if (!total_size) {
 428                error = -EINVAL;
 429                goto out_close;
 430        }
 431
 432        eppnt = elf_phdata;
 433        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 434                if (eppnt->p_type == PT_LOAD) {
 435                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 436                        int elf_prot = 0;
 437                        unsigned long vaddr = 0;
 438                        unsigned long k, map_addr;
 439
 440                        if (eppnt->p_flags & PF_R)
 441                                elf_prot = PROT_READ;
 442                        if (eppnt->p_flags & PF_W)
 443                                elf_prot |= PROT_WRITE;
 444                        if (eppnt->p_flags & PF_X)
 445                                elf_prot |= PROT_EXEC;
 446                        vaddr = eppnt->p_vaddr;
 447                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 448                                elf_type |= MAP_FIXED;
 449                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 450                                load_addr = -vaddr;
 451
 452                        map_addr = elf_map(interpreter, load_addr + vaddr,
 453                                        eppnt, elf_prot, elf_type, total_size);
 454                        total_size = 0;
 455                        if (!*interp_map_addr)
 456                                *interp_map_addr = map_addr;
 457                        error = map_addr;
 458                        if (BAD_ADDR(map_addr))
 459                                goto out_close;
 460
 461                        if (!load_addr_set &&
 462                            interp_elf_ex->e_type == ET_DYN) {
 463                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 464                                load_addr_set = 1;
 465                        }
 466
 467                        /*
 468                         * Check to see if the section's size will overflow the
 469                         * allowed task size. Note that p_filesz must always be
 470                         * <= p_memsize so it's only necessary to check p_memsz.
 471                         */
 472                        k = load_addr + eppnt->p_vaddr;
 473                        if (BAD_ADDR(k) ||
 474                            eppnt->p_filesz > eppnt->p_memsz ||
 475                            eppnt->p_memsz > TASK_SIZE ||
 476                            TASK_SIZE - eppnt->p_memsz < k) {
 477                                error = -ENOMEM;
 478                                goto out_close;
 479                        }
 480
 481                        /*
 482                         * Find the end of the file mapping for this phdr, and
 483                         * keep track of the largest address we see for this.
 484                         */
 485                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 486                        if (k > elf_bss)
 487                                elf_bss = k;
 488
 489                        /*
 490                         * Do the same thing for the memory mapping - between
 491                         * elf_bss and last_bss is the bss section.
 492                         */
 493                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 494                        if (k > last_bss)
 495                                last_bss = k;
 496                }
 497        }
 498
 499        /*
 500         * Now fill out the bss section.  First pad the last page up
 501         * to the page boundary, and then perform a mmap to make sure
 502         * that there are zero-mapped pages up to and including the 
 503         * last bss page.
 504         */
 505        if (padzero(elf_bss)) {
 506                error = -EFAULT;
 507                goto out_close;
 508        }
 509
 510        /* What we have mapped so far */
 511        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 512
 513        /* Map the last of the bss segment */
 514        if (last_bss > elf_bss) {
 515                down_write(&current->mm->mmap_sem);
 516                error = do_brk(elf_bss, last_bss - elf_bss);
 517                up_write(&current->mm->mmap_sem);
 518                if (BAD_ADDR(error))
 519                        goto out_close;
 520        }
 521
 522        error = load_addr;
 523
 524out_close:
 525        kfree(elf_phdata);
 526out:
 527        return error;
 528}
 529
 530/*
 531 * These are the functions used to load ELF style executables and shared
 532 * libraries.  There is no binary dependent code anywhere else.
 533 */
 534
 535#define INTERPRETER_NONE 0
 536#define INTERPRETER_ELF 2
 537
 538#ifndef STACK_RND_MASK
 539#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 540#endif
 541
 542static unsigned long randomize_stack_top(unsigned long stack_top)
 543{
 544        unsigned int random_variable = 0;
 545
 546        if ((current->flags & PF_RANDOMIZE) &&
 547                !(current->personality & ADDR_NO_RANDOMIZE)) {
 548                random_variable = get_random_int() & STACK_RND_MASK;
 549                random_variable <<= PAGE_SHIFT;
 550        }
 551#ifdef CONFIG_STACK_GROWSUP
 552        return PAGE_ALIGN(stack_top) + random_variable;
 553#else
 554        return PAGE_ALIGN(stack_top) - random_variable;
 555#endif
 556}
 557
 558static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 559{
 560        struct file *interpreter = NULL; /* to shut gcc up */
 561        unsigned long load_addr = 0, load_bias = 0;
 562        int load_addr_set = 0;
 563        char * elf_interpreter = NULL;
 564        unsigned long error;
 565        struct elf_phdr *elf_ppnt, *elf_phdata;
 566        unsigned long elf_bss, elf_brk;
 567        int elf_exec_fileno;
 568        int retval, i;
 569        unsigned int size;
 570        unsigned long elf_entry;
 571        unsigned long interp_load_addr = 0;
 572        unsigned long start_code, end_code, start_data, end_data;
 573        unsigned long reloc_func_desc = 0;
 574        int executable_stack = EXSTACK_DEFAULT;
 575        unsigned long def_flags = 0;
 576        struct {
 577                struct elfhdr elf_ex;
 578                struct elfhdr interp_elf_ex;
 579        } *loc;
 580
 581        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 582        if (!loc) {
 583                retval = -ENOMEM;
 584                goto out_ret;
 585        }
 586        
 587        /* Get the exec-header */
 588        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 589
 590        retval = -ENOEXEC;
 591        /* First of all, some simple consistency checks */
 592        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 593                goto out;
 594
 595        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 596                goto out;
 597        if (!elf_check_arch(&loc->elf_ex))
 598                goto out;
 599        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 600                goto out;
 601
 602        /* Now read in all of the header information */
 603        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 604                goto out;
 605        if (loc->elf_ex.e_phnum < 1 ||
 606                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 607                goto out;
 608        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 609        retval = -ENOMEM;
 610        elf_phdata = kmalloc(size, GFP_KERNEL);
 611        if (!elf_phdata)
 612                goto out;
 613
 614        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 615                             (char *)elf_phdata, size);
 616        if (retval != size) {
 617                if (retval >= 0)
 618                        retval = -EIO;
 619                goto out_free_ph;
 620        }
 621
 622        retval = get_unused_fd();
 623        if (retval < 0)
 624                goto out_free_ph;
 625        get_file(bprm->file);
 626        fd_install(elf_exec_fileno = retval, bprm->file);
 627
 628        elf_ppnt = elf_phdata;
 629        elf_bss = 0;
 630        elf_brk = 0;
 631
 632        start_code = ~0UL;
 633        end_code = 0;
 634        start_data = 0;
 635        end_data = 0;
 636
 637        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 638                if (elf_ppnt->p_type == PT_INTERP) {
 639                        /* This is the program interpreter used for
 640                         * shared libraries - for now assume that this
 641                         * is an a.out format binary
 642                         */
 643                        retval = -ENOEXEC;
 644                        if (elf_ppnt->p_filesz > PATH_MAX || 
 645                            elf_ppnt->p_filesz < 2)
 646                                goto out_free_file;
 647
 648                        retval = -ENOMEM;
 649                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 650                                                  GFP_KERNEL);
 651                        if (!elf_interpreter)
 652                                goto out_free_file;
 653
 654                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 655                                             elf_interpreter,
 656                                             elf_ppnt->p_filesz);
 657                        if (retval != elf_ppnt->p_filesz) {
 658                                if (retval >= 0)
 659                                        retval = -EIO;
 660                                goto out_free_interp;
 661                        }
 662                        /* make sure path is NULL terminated */
 663                        retval = -ENOEXEC;
 664                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 665                                goto out_free_interp;
 666
 667                        /*
 668                         * The early SET_PERSONALITY here is so that the lookup
 669                         * for the interpreter happens in the namespace of the 
 670                         * to-be-execed image.  SET_PERSONALITY can select an
 671                         * alternate root.
 672                         *
 673                         * However, SET_PERSONALITY is NOT allowed to switch
 674                         * this task into the new images's memory mapping
 675                         * policy - that is, TASK_SIZE must still evaluate to
 676                         * that which is appropriate to the execing application.
 677                         * This is because exit_mmap() needs to have TASK_SIZE
 678                         * evaluate to the size of the old image.
 679                         *
 680                         * So if (say) a 64-bit application is execing a 32-bit
 681                         * application it is the architecture's responsibility
 682                         * to defer changing the value of TASK_SIZE until the
 683                         * switch really is going to happen - do this in
 684                         * flush_thread().      - akpm
 685                         */
 686                        SET_PERSONALITY(loc->elf_ex, 0);
 687
 688                        interpreter = open_exec(elf_interpreter);
 689                        retval = PTR_ERR(interpreter);
 690                        if (IS_ERR(interpreter))
 691                                goto out_free_interp;
 692
 693                        /*
 694                         * If the binary is not readable then enforce
 695                         * mm->dumpable = 0 regardless of the interpreter's
 696                         * permissions.
 697                         */
 698                        if (file_permission(interpreter, MAY_READ) < 0)
 699                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 700
 701                        retval = kernel_read(interpreter, 0, bprm->buf,
 702                                             BINPRM_BUF_SIZE);
 703                        if (retval != BINPRM_BUF_SIZE) {
 704                                if (retval >= 0)
 705                                        retval = -EIO;
 706                                goto out_free_dentry;
 707                        }
 708
 709                        /* Get the exec headers */
 710                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 711                        break;
 712                }
 713                elf_ppnt++;
 714        }
 715
 716        elf_ppnt = elf_phdata;
 717        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 718                if (elf_ppnt->p_type == PT_GNU_STACK) {
 719                        if (elf_ppnt->p_flags & PF_X)
 720                                executable_stack = EXSTACK_ENABLE_X;
 721                        else
 722                                executable_stack = EXSTACK_DISABLE_X;
 723                        break;
 724                }
 725
 726        /* Some simple consistency checks for the interpreter */
 727        if (elf_interpreter) {
 728                retval = -ELIBBAD;
 729                /* Not an ELF interpreter */
 730                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 731                        goto out_free_dentry;
 732                /* Verify the interpreter has a valid arch */
 733                if (!elf_check_arch(&loc->interp_elf_ex))
 734                        goto out_free_dentry;
 735        } else {
 736                /* Executables without an interpreter also need a personality  */
 737                SET_PERSONALITY(loc->elf_ex, 0);
 738        }
 739
 740        /* Flush all traces of the currently running executable */
 741        retval = flush_old_exec(bprm);
 742        if (retval)
 743                goto out_free_dentry;
 744
 745        /* OK, This is the point of no return */
 746        current->flags &= ~PF_FORKNOEXEC;
 747        current->mm->def_flags = def_flags;
 748
 749        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 750           may depend on the personality.  */
 751        SET_PERSONALITY(loc->elf_ex, 0);
 752        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 753                current->personality |= READ_IMPLIES_EXEC;
 754
 755        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 756                current->flags |= PF_RANDOMIZE;
 757        arch_pick_mmap_layout(current->mm);
 758
 759        /* Do this so that we can load the interpreter, if need be.  We will
 760           change some of these later */
 761        current->mm->free_area_cache = current->mm->mmap_base;
 762        current->mm->cached_hole_size = 0;
 763        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 764                                 executable_stack);
 765        if (retval < 0) {
 766                send_sig(SIGKILL, current, 0);
 767                goto out_free_dentry;
 768        }
 769        
 770        current->mm->start_stack = bprm->p;
 771
 772        /* Now we do a little grungy work by mmaping the ELF image into
 773           the correct location in memory. */
 774        for(i = 0, elf_ppnt = elf_phdata;
 775            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 776                int elf_prot = 0, elf_flags;
 777                unsigned long k, vaddr;
 778
 779                if (elf_ppnt->p_type != PT_LOAD)
 780                        continue;
 781
 782                if (unlikely (elf_brk > elf_bss)) {
 783                        unsigned long nbyte;
 784                    
 785                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 786                           before this one. Map anonymous pages, if needed,
 787                           and clear the area.  */
 788                        retval = set_brk (elf_bss + load_bias,
 789                                          elf_brk + load_bias);
 790                        if (retval) {
 791                                send_sig(SIGKILL, current, 0);
 792                                goto out_free_dentry;
 793                        }
 794                        nbyte = ELF_PAGEOFFSET(elf_bss);
 795                        if (nbyte) {
 796                                nbyte = ELF_MIN_ALIGN - nbyte;
 797                                if (nbyte > elf_brk - elf_bss)
 798                                        nbyte = elf_brk - elf_bss;
 799                                if (clear_user((void __user *)elf_bss +
 800                                                        load_bias, nbyte)) {
 801                                        /*
 802                                         * This bss-zeroing can fail if the ELF
 803                                         * file specifies odd protections. So
 804                                         * we don't check the return value
 805                                         */
 806                                }
 807                        }
 808                }
 809
 810                if (elf_ppnt->p_flags & PF_R)
 811                        elf_prot |= PROT_READ;
 812                if (elf_ppnt->p_flags & PF_W)
 813                        elf_prot |= PROT_WRITE;
 814                if (elf_ppnt->p_flags & PF_X)
 815                        elf_prot |= PROT_EXEC;
 816
 817                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 818
 819                vaddr = elf_ppnt->p_vaddr;
 820                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 821                        elf_flags |= MAP_FIXED;
 822                } else if (loc->elf_ex.e_type == ET_DYN) {
 823                        /* Try and get dynamic programs out of the way of the
 824                         * default mmap base, as well as whatever program they
 825                         * might try to exec.  This is because the brk will
 826                         * follow the loader, and is not movable.  */
 827#ifdef CONFIG_X86
 828                        load_bias = 0;
 829#else
 830                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 831#endif
 832                }
 833
 834                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 835                                elf_prot, elf_flags, 0);
 836                if (BAD_ADDR(error)) {
 837                        send_sig(SIGKILL, current, 0);
 838                        retval = IS_ERR((void *)error) ?
 839                                PTR_ERR((void*)error) : -EINVAL;
 840                        goto out_free_dentry;
 841                }
 842
 843                if (!load_addr_set) {
 844                        load_addr_set = 1;
 845                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 846                        if (loc->elf_ex.e_type == ET_DYN) {
 847                                load_bias += error -
 848                                             ELF_PAGESTART(load_bias + vaddr);
 849                                load_addr += load_bias;
 850                                reloc_func_desc = load_bias;
 851                        }
 852                }
 853                k = elf_ppnt->p_vaddr;
 854                if (k < start_code)
 855                        start_code = k;
 856                if (start_data < k)
 857                        start_data = k;
 858
 859                /*
 860                 * Check to see if the section's size will overflow the
 861                 * allowed task size. Note that p_filesz must always be
 862                 * <= p_memsz so it is only necessary to check p_memsz.
 863                 */
 864                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 865                    elf_ppnt->p_memsz > TASK_SIZE ||
 866                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 867                        /* set_brk can never work. Avoid overflows. */
 868                        send_sig(SIGKILL, current, 0);
 869                        retval = -EINVAL;
 870                        goto out_free_dentry;
 871                }
 872
 873                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 874
 875                if (k > elf_bss)
 876                        elf_bss = k;
 877                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 878                        end_code = k;
 879                if (end_data < k)
 880                        end_data = k;
 881                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 882                if (k > elf_brk)
 883                        elf_brk = k;
 884        }
 885
 886        loc->elf_ex.e_entry += load_bias;
 887        elf_bss += load_bias;
 888        elf_brk += load_bias;
 889        start_code += load_bias;
 890        end_code += load_bias;
 891        start_data += load_bias;
 892        end_data += load_bias;
 893
 894        /* Calling set_brk effectively mmaps the pages that we need
 895         * for the bss and break sections.  We must do this before
 896         * mapping in the interpreter, to make sure it doesn't wind
 897         * up getting placed where the bss needs to go.
 898         */
 899        retval = set_brk(elf_bss, elf_brk);
 900        if (retval) {
 901                send_sig(SIGKILL, current, 0);
 902                goto out_free_dentry;
 903        }
 904        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 905                send_sig(SIGSEGV, current, 0);
 906                retval = -EFAULT; /* Nobody gets to see this, but.. */
 907                goto out_free_dentry;
 908        }
 909
 910        if (elf_interpreter) {
 911                unsigned long uninitialized_var(interp_map_addr);
 912
 913                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 914                                            interpreter,
 915                                            &interp_map_addr,
 916                                            load_bias);
 917                if (!IS_ERR((void *)elf_entry)) {
 918                        /*
 919                         * load_elf_interp() returns relocation
 920                         * adjustment
 921                         */
 922                        interp_load_addr = elf_entry;
 923                        elf_entry += loc->interp_elf_ex.e_entry;
 924                }
 925                if (BAD_ADDR(elf_entry)) {
 926                        force_sig(SIGSEGV, current);
 927                        retval = IS_ERR((void *)elf_entry) ?
 928                                        (int)elf_entry : -EINVAL;
 929                        goto out_free_dentry;
 930                }
 931                reloc_func_desc = interp_load_addr;
 932
 933                allow_write_access(interpreter);
 934                fput(interpreter);
 935                kfree(elf_interpreter);
 936        } else {
 937                elf_entry = loc->elf_ex.e_entry;
 938                if (BAD_ADDR(elf_entry)) {
 939                        force_sig(SIGSEGV, current);
 940                        retval = -EINVAL;
 941                        goto out_free_dentry;
 942                }
 943        }
 944
 945        kfree(elf_phdata);
 946
 947        sys_close(elf_exec_fileno);
 948
 949        set_binfmt(&elf_format);
 950
 951#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 952        retval = arch_setup_additional_pages(bprm, executable_stack);
 953        if (retval < 0) {
 954                send_sig(SIGKILL, current, 0);
 955                goto out;
 956        }
 957#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 958
 959        compute_creds(bprm);
 960        current->flags &= ~PF_FORKNOEXEC;
 961        retval = create_elf_tables(bprm, &loc->elf_ex,
 962                          load_addr, interp_load_addr);
 963        if (retval < 0) {
 964                send_sig(SIGKILL, current, 0);
 965                goto out;
 966        }
 967        /* N.B. passed_fileno might not be initialized? */
 968        current->mm->end_code = end_code;
 969        current->mm->start_code = start_code;
 970        current->mm->start_data = start_data;
 971        current->mm->end_data = end_data;
 972        current->mm->start_stack = bprm->p;
 973
 974#ifdef arch_randomize_brk
 975        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
 976                current->mm->brk = current->mm->start_brk =
 977                        arch_randomize_brk(current->mm);
 978#endif
 979
 980        if (current->personality & MMAP_PAGE_ZERO) {
 981                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 982                   and some applications "depend" upon this behavior.
 983                   Since we do not have the power to recompile these, we
 984                   emulate the SVr4 behavior. Sigh. */
 985                down_write(&current->mm->mmap_sem);
 986                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 987                                MAP_FIXED | MAP_PRIVATE, 0);
 988                up_write(&current->mm->mmap_sem);
 989        }
 990
 991#ifdef ELF_PLAT_INIT
 992        /*
 993         * The ABI may specify that certain registers be set up in special
 994         * ways (on i386 %edx is the address of a DT_FINI function, for
 995         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 996         * that the e_entry field is the address of the function descriptor
 997         * for the startup routine, rather than the address of the startup
 998         * routine itself.  This macro performs whatever initialization to
 999         * the regs structure is required as well as any relocations to the
1000         * function descriptor entries when executing dynamically links apps.
1001         */
1002        ELF_PLAT_INIT(regs, reloc_func_desc);
1003#endif
1004
1005        start_thread(regs, elf_entry, bprm->p);
1006        retval = 0;
1007out:
1008        kfree(loc);
1009out_ret:
1010        return retval;
1011
1012        /* error cleanup */
1013out_free_dentry:
1014        allow_write_access(interpreter);
1015        if (interpreter)
1016                fput(interpreter);
1017out_free_interp:
1018        kfree(elf_interpreter);
1019out_free_file:
1020        sys_close(elf_exec_fileno);
1021out_free_ph:
1022        kfree(elf_phdata);
1023        goto out;
1024}
1025
1026/* This is really simpleminded and specialized - we are loading an
1027   a.out library that is given an ELF header. */
1028static int load_elf_library(struct file *file)
1029{
1030        struct elf_phdr *elf_phdata;
1031        struct elf_phdr *eppnt;
1032        unsigned long elf_bss, bss, len;
1033        int retval, error, i, j;
1034        struct elfhdr elf_ex;
1035
1036        error = -ENOEXEC;
1037        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1038        if (retval != sizeof(elf_ex))
1039                goto out;
1040
1041        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1042                goto out;
1043
1044        /* First of all, some simple consistency checks */
1045        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1046            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1047                goto out;
1048
1049        /* Now read in all of the header information */
1050
1051        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1052        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1053
1054        error = -ENOMEM;
1055        elf_phdata = kmalloc(j, GFP_KERNEL);
1056        if (!elf_phdata)
1057                goto out;
1058
1059        eppnt = elf_phdata;
1060        error = -ENOEXEC;
1061        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1062        if (retval != j)
1063                goto out_free_ph;
1064
1065        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1066                if ((eppnt + i)->p_type == PT_LOAD)
1067                        j++;
1068        if (j != 1)
1069                goto out_free_ph;
1070
1071        while (eppnt->p_type != PT_LOAD)
1072                eppnt++;
1073
1074        /* Now use mmap to map the library into memory. */
1075        down_write(&current->mm->mmap_sem);
1076        error = do_mmap(file,
1077                        ELF_PAGESTART(eppnt->p_vaddr),
1078                        (eppnt->p_filesz +
1079                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1080                        PROT_READ | PROT_WRITE | PROT_EXEC,
1081                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1082                        (eppnt->p_offset -
1083                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1084        up_write(&current->mm->mmap_sem);
1085        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1086                goto out_free_ph;
1087
1088        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1089        if (padzero(elf_bss)) {
1090                error = -EFAULT;
1091                goto out_free_ph;
1092        }
1093
1094        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1095                            ELF_MIN_ALIGN - 1);
1096        bss = eppnt->p_memsz + eppnt->p_vaddr;
1097        if (bss > len) {
1098                down_write(&current->mm->mmap_sem);
1099                do_brk(len, bss - len);
1100                up_write(&current->mm->mmap_sem);
1101        }
1102        error = 0;
1103
1104out_free_ph:
1105        kfree(elf_phdata);
1106out:
1107        return error;
1108}
1109
1110/*
1111 * Note that some platforms still use traditional core dumps and not
1112 * the ELF core dump.  Each platform can select it as appropriate.
1113 */
1114#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1115
1116/*
1117 * ELF core dumper
1118 *
1119 * Modelled on fs/exec.c:aout_core_dump()
1120 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1121 */
1122/*
1123 * These are the only things you should do on a core-file: use only these
1124 * functions to write out all the necessary info.
1125 */
1126static int dump_write(struct file *file, const void *addr, int nr)
1127{
1128        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1129}
1130
1131static int dump_seek(struct file *file, loff_t off)
1132{
1133        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1134                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1135                        return 0;
1136        } else {
1137                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1138                if (!buf)
1139                        return 0;
1140                while (off > 0) {
1141                        unsigned long n = off;
1142                        if (n > PAGE_SIZE)
1143                                n = PAGE_SIZE;
1144                        if (!dump_write(file, buf, n))
1145                                return 0;
1146                        off -= n;
1147                }
1148                free_page((unsigned long)buf);
1149        }
1150        return 1;
1151}
1152
1153/*
1154 * Decide what to dump of a segment, part, all or none.
1155 */
1156static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157                                   unsigned long mm_flags)
1158{
1159        /* The vma can be set up to tell us the answer directly.  */
1160        if (vma->vm_flags & VM_ALWAYSDUMP)
1161                goto whole;
1162
1163        /* Do not dump I/O mapped devices or special mappings */
1164        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1165                return 0;
1166
1167#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1168
1169        /* By default, dump shared memory if mapped from an anonymous file. */
1170        if (vma->vm_flags & VM_SHARED) {
1171                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1172                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1173                        goto whole;
1174                return 0;
1175        }
1176
1177        /* Dump segments that have been written to.  */
1178        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1179                goto whole;
1180        if (vma->vm_file == NULL)
1181                return 0;
1182
1183        if (FILTER(MAPPED_PRIVATE))
1184                goto whole;
1185
1186        /*
1187         * If this looks like the beginning of a DSO or executable mapping,
1188         * check for an ELF header.  If we find one, dump the first page to
1189         * aid in determining what was mapped here.
1190         */
1191        if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1192                u32 __user *header = (u32 __user *) vma->vm_start;
1193                u32 word;
1194                /*
1195                 * Doing it this way gets the constant folded by GCC.
1196                 */
1197                union {
1198                        u32 cmp;
1199                        char elfmag[SELFMAG];
1200                } magic;
1201                BUILD_BUG_ON(SELFMAG != sizeof word);
1202                magic.elfmag[EI_MAG0] = ELFMAG0;
1203                magic.elfmag[EI_MAG1] = ELFMAG1;
1204                magic.elfmag[EI_MAG2] = ELFMAG2;
1205                magic.elfmag[EI_MAG3] = ELFMAG3;
1206                if (get_user(word, header) == 0 && word == magic.cmp)
1207                        return PAGE_SIZE;
1208        }
1209
1210#undef  FILTER
1211
1212        return 0;
1213
1214whole:
1215        return vma->vm_end - vma->vm_start;
1216}
1217
1218/* An ELF note in memory */
1219struct memelfnote
1220{
1221        const char *name;
1222        int type;
1223        unsigned int datasz;
1224        void *data;
1225};
1226
1227static int notesize(struct memelfnote *en)
1228{
1229        int sz;
1230
1231        sz = sizeof(struct elf_note);
1232        sz += roundup(strlen(en->name) + 1, 4);
1233        sz += roundup(en->datasz, 4);
1234
1235        return sz;
1236}
1237
1238#define DUMP_WRITE(addr, nr, foffset)   \
1239        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1240
1241static int alignfile(struct file *file, loff_t *foffset)
1242{
1243        static const char buf[4] = { 0, };
1244        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1245        return 1;
1246}
1247
1248static int writenote(struct memelfnote *men, struct file *file,
1249                        loff_t *foffset)
1250{
1251        struct elf_note en;
1252        en.n_namesz = strlen(men->name) + 1;
1253        en.n_descsz = men->datasz;
1254        en.n_type = men->type;
1255
1256        DUMP_WRITE(&en, sizeof(en), foffset);
1257        DUMP_WRITE(men->name, en.n_namesz, foffset);
1258        if (!alignfile(file, foffset))
1259                return 0;
1260        DUMP_WRITE(men->data, men->datasz, foffset);
1261        if (!alignfile(file, foffset))
1262                return 0;
1263
1264        return 1;
1265}
1266#undef DUMP_WRITE
1267
1268#define DUMP_WRITE(addr, nr)    \
1269        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1270                goto end_coredump;
1271#define DUMP_SEEK(off)  \
1272        if (!dump_seek(file, (off))) \
1273                goto end_coredump;
1274
1275static void fill_elf_header(struct elfhdr *elf, int segs,
1276                            u16 machine, u32 flags, u8 osabi)
1277{
1278        memset(elf, 0, sizeof(*elf));
1279
1280        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1281        elf->e_ident[EI_CLASS] = ELF_CLASS;
1282        elf->e_ident[EI_DATA] = ELF_DATA;
1283        elf->e_ident[EI_VERSION] = EV_CURRENT;
1284        elf->e_ident[EI_OSABI] = ELF_OSABI;
1285
1286        elf->e_type = ET_CORE;
1287        elf->e_machine = machine;
1288        elf->e_version = EV_CURRENT;
1289        elf->e_phoff = sizeof(struct elfhdr);
1290        elf->e_flags = flags;
1291        elf->e_ehsize = sizeof(struct elfhdr);
1292        elf->e_phentsize = sizeof(struct elf_phdr);
1293        elf->e_phnum = segs;
1294
1295        return;
1296}
1297
1298static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1299{
1300        phdr->p_type = PT_NOTE;
1301        phdr->p_offset = offset;
1302        phdr->p_vaddr = 0;
1303        phdr->p_paddr = 0;
1304        phdr->p_filesz = sz;
1305        phdr->p_memsz = 0;
1306        phdr->p_flags = 0;
1307        phdr->p_align = 0;
1308        return;
1309}
1310
1311static void fill_note(struct memelfnote *note, const char *name, int type, 
1312                unsigned int sz, void *data)
1313{
1314        note->name = name;
1315        note->type = type;
1316        note->datasz = sz;
1317        note->data = data;
1318        return;
1319}
1320
1321/*
1322 * fill up all the fields in prstatus from the given task struct, except
1323 * registers which need to be filled up separately.
1324 */
1325static void fill_prstatus(struct elf_prstatus *prstatus,
1326                struct task_struct *p, long signr)
1327{
1328        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1329        prstatus->pr_sigpend = p->pending.signal.sig[0];
1330        prstatus->pr_sighold = p->blocked.sig[0];
1331        prstatus->pr_pid = task_pid_vnr(p);
1332        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1333        prstatus->pr_pgrp = task_pgrp_vnr(p);
1334        prstatus->pr_sid = task_session_vnr(p);
1335        if (thread_group_leader(p)) {
1336                /*
1337                 * This is the record for the group leader.  Add in the
1338                 * cumulative times of previous dead threads.  This total
1339                 * won't include the time of each live thread whose state
1340                 * is included in the core dump.  The final total reported
1341                 * to our parent process when it calls wait4 will include
1342                 * those sums as well as the little bit more time it takes
1343                 * this and each other thread to finish dying after the
1344                 * core dump synchronization phase.
1345                 */
1346                cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1347                                   &prstatus->pr_utime);
1348                cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1349                                   &prstatus->pr_stime);
1350        } else {
1351                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1352                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1353        }
1354        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1355        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1356}
1357
1358static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1359                       struct mm_struct *mm)
1360{
1361        unsigned int i, len;
1362        
1363        /* first copy the parameters from user space */
1364        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1365
1366        len = mm->arg_end - mm->arg_start;
1367        if (len >= ELF_PRARGSZ)
1368                len = ELF_PRARGSZ-1;
1369        if (copy_from_user(&psinfo->pr_psargs,
1370                           (const char __user *)mm->arg_start, len))
1371                return -EFAULT;
1372        for(i = 0; i < len; i++)
1373                if (psinfo->pr_psargs[i] == 0)
1374                        psinfo->pr_psargs[i] = ' ';
1375        psinfo->pr_psargs[len] = 0;
1376
1377        psinfo->pr_pid = task_pid_vnr(p);
1378        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1379        psinfo->pr_pgrp = task_pgrp_vnr(p);
1380        psinfo->pr_sid = task_session_vnr(p);
1381
1382        i = p->state ? ffz(~p->state) + 1 : 0;
1383        psinfo->pr_state = i;
1384        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1385        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1386        psinfo->pr_nice = task_nice(p);
1387        psinfo->pr_flag = p->flags;
1388        SET_UID(psinfo->pr_uid, p->uid);
1389        SET_GID(psinfo->pr_gid, p->gid);
1390        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1391        
1392        return 0;
1393}
1394
1395static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1396{
1397        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1398        int i = 0;
1399        do
1400                i += 2;
1401        while (auxv[i - 2] != AT_NULL);
1402        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1403}
1404
1405#ifdef CORE_DUMP_USE_REGSET
1406#include <linux/regset.h>
1407
1408struct elf_thread_core_info {
1409        struct elf_thread_core_info *next;
1410        struct task_struct *task;
1411        struct elf_prstatus prstatus;
1412        struct memelfnote notes[0];
1413};
1414
1415struct elf_note_info {
1416        struct elf_thread_core_info *thread;
1417        struct memelfnote psinfo;
1418        struct memelfnote auxv;
1419        size_t size;
1420        int thread_notes;
1421};
1422
1423/*
1424 * When a regset has a writeback hook, we call it on each thread before
1425 * dumping user memory.  On register window machines, this makes sure the
1426 * user memory backing the register data is up to date before we read it.
1427 */
1428static void do_thread_regset_writeback(struct task_struct *task,
1429                                       const struct user_regset *regset)
1430{
1431        if (regset->writeback)
1432                regset->writeback(task, regset, 1);
1433}
1434
1435static int fill_thread_core_info(struct elf_thread_core_info *t,
1436                                 const struct user_regset_view *view,
1437                                 long signr, size_t *total)
1438{
1439        unsigned int i;
1440
1441        /*
1442         * NT_PRSTATUS is the one special case, because the regset data
1443         * goes into the pr_reg field inside the note contents, rather
1444         * than being the whole note contents.  We fill the reset in here.
1445         * We assume that regset 0 is NT_PRSTATUS.
1446         */
1447        fill_prstatus(&t->prstatus, t->task, signr);
1448        (void) view->regsets[0].get(t->task, &view->regsets[0],
1449                                    0, sizeof(t->prstatus.pr_reg),
1450                                    &t->prstatus.pr_reg, NULL);
1451
1452        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1453                  sizeof(t->prstatus), &t->prstatus);
1454        *total += notesize(&t->notes[0]);
1455
1456        do_thread_regset_writeback(t->task, &view->regsets[0]);
1457
1458        /*
1459         * Each other regset might generate a note too.  For each regset
1460         * that has no core_note_type or is inactive, we leave t->notes[i]
1461         * all zero and we'll know to skip writing it later.
1462         */
1463        for (i = 1; i < view->n; ++i) {
1464                const struct user_regset *regset = &view->regsets[i];
1465                do_thread_regset_writeback(t->task, regset);
1466                if (regset->core_note_type &&
1467                    (!regset->active || regset->active(t->task, regset))) {
1468                        int ret;
1469                        size_t size = regset->n * regset->size;
1470                        void *data = kmalloc(size, GFP_KERNEL);
1471                        if (unlikely(!data))
1472                                return 0;
1473                        ret = regset->get(t->task, regset,
1474                                          0, size, data, NULL);
1475                        if (unlikely(ret))
1476                                kfree(data);
1477                        else {
1478                                if (regset->core_note_type != NT_PRFPREG)
1479                                        fill_note(&t->notes[i], "LINUX",
1480                                                  regset->core_note_type,
1481                                                  size, data);
1482                                else {
1483                                        t->prstatus.pr_fpvalid = 1;
1484                                        fill_note(&t->notes[i], "CORE",
1485                                                  NT_PRFPREG, size, data);
1486                                }
1487                                *total += notesize(&t->notes[i]);
1488                        }
1489                }
1490        }
1491
1492        return 1;
1493}
1494
1495static int fill_note_info(struct elfhdr *elf, int phdrs,
1496                          struct elf_note_info *info,
1497                          long signr, struct pt_regs *regs)
1498{
1499        struct task_struct *dump_task = current;
1500        const struct user_regset_view *view = task_user_regset_view(dump_task);
1501        struct elf_thread_core_info *t;
1502        struct elf_prpsinfo *psinfo;
1503        struct core_thread *ct;
1504        unsigned int i;
1505
1506        info->size = 0;
1507        info->thread = NULL;
1508
1509        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1510        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1511
1512        if (psinfo == NULL)
1513                return 0;
1514
1515        /*
1516         * Figure out how many notes we're going to need for each thread.
1517         */
1518        info->thread_notes = 0;
1519        for (i = 0; i < view->n; ++i)
1520                if (view->regsets[i].core_note_type != 0)
1521                        ++info->thread_notes;
1522
1523        /*
1524         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1525         * since it is our one special case.
1526         */
1527        if (unlikely(info->thread_notes == 0) ||
1528            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1529                WARN_ON(1);
1530                return 0;
1531        }
1532
1533        /*
1534         * Initialize the ELF file header.
1535         */
1536        fill_elf_header(elf, phdrs,
1537                        view->e_machine, view->e_flags, view->ei_osabi);
1538
1539        /*
1540         * Allocate a structure for each thread.
1541         */
1542        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1543                t = kzalloc(offsetof(struct elf_thread_core_info,
1544                                     notes[info->thread_notes]),
1545                            GFP_KERNEL);
1546                if (unlikely(!t))
1547                        return 0;
1548
1549                t->task = ct->task;
1550                if (ct->task == dump_task || !info->thread) {
1551                        t->next = info->thread;
1552                        info->thread = t;
1553                } else {
1554                        /*
1555                         * Make sure to keep the original task at
1556                         * the head of the list.
1557                         */
1558                        t->next = info->thread->next;
1559                        info->thread->next = t;
1560                }
1561        }
1562
1563        /*
1564         * Now fill in each thread's information.
1565         */
1566        for (t = info->thread; t != NULL; t = t->next)
1567                if (!fill_thread_core_info(t, view, signr, &info->size))
1568                        return 0;
1569
1570        /*
1571         * Fill in the two process-wide notes.
1572         */
1573        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1574        info->size += notesize(&info->psinfo);
1575
1576        fill_auxv_note(&info->auxv, current->mm);
1577        info->size += notesize(&info->auxv);
1578
1579        return 1;
1580}
1581
1582static size_t get_note_info_size(struct elf_note_info *info)
1583{
1584        return info->size;
1585}
1586
1587/*
1588 * Write all the notes for each thread.  When writing the first thread, the
1589 * process-wide notes are interleaved after the first thread-specific note.
1590 */
1591static int write_note_info(struct elf_note_info *info,
1592                           struct file *file, loff_t *foffset)
1593{
1594        bool first = 1;
1595        struct elf_thread_core_info *t = info->thread;
1596
1597        do {
1598                int i;
1599
1600                if (!writenote(&t->notes[0], file, foffset))
1601                        return 0;
1602
1603                if (first && !writenote(&info->psinfo, file, foffset))
1604                        return 0;
1605                if (first && !writenote(&info->auxv, file, foffset))
1606                        return 0;
1607
1608                for (i = 1; i < info->thread_notes; ++i)
1609                        if (t->notes[i].data &&
1610                            !writenote(&t->notes[i], file, foffset))
1611                                return 0;
1612
1613                first = 0;
1614                t = t->next;
1615        } while (t);
1616
1617        return 1;
1618}
1619
1620static void free_note_info(struct elf_note_info *info)
1621{
1622        struct elf_thread_core_info *threads = info->thread;
1623        while (threads) {
1624                unsigned int i;
1625                struct elf_thread_core_info *t = threads;
1626                threads = t->next;
1627                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1628                for (i = 1; i < info->thread_notes; ++i)
1629                        kfree(t->notes[i].data);
1630                kfree(t);
1631        }
1632        kfree(info->psinfo.data);
1633}
1634
1635#else
1636
1637/* Here is the structure in which status of each thread is captured. */
1638struct elf_thread_status
1639{
1640        struct list_head list;
1641        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1642        elf_fpregset_t fpu;             /* NT_PRFPREG */
1643        struct task_struct *thread;
1644#ifdef ELF_CORE_COPY_XFPREGS
1645        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1646#endif
1647        struct memelfnote notes[3];
1648        int num_notes;
1649};
1650
1651/*
1652 * In order to add the specific thread information for the elf file format,
1653 * we need to keep a linked list of every threads pr_status and then create
1654 * a single section for them in the final core file.
1655 */
1656static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1657{
1658        int sz = 0;
1659        struct task_struct *p = t->thread;
1660        t->num_notes = 0;
1661
1662        fill_prstatus(&t->prstatus, p, signr);
1663        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1664        
1665        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1666                  &(t->prstatus));
1667        t->num_notes++;
1668        sz += notesize(&t->notes[0]);
1669
1670        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1671                                                                &t->fpu))) {
1672                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1673                          &(t->fpu));
1674                t->num_notes++;
1675                sz += notesize(&t->notes[1]);
1676        }
1677
1678#ifdef ELF_CORE_COPY_XFPREGS
1679        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1680                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1681                          sizeof(t->xfpu), &t->xfpu);
1682                t->num_notes++;
1683                sz += notesize(&t->notes[2]);
1684        }
1685#endif  
1686        return sz;
1687}
1688
1689struct elf_note_info {
1690        struct memelfnote *notes;
1691        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1692        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1693        struct list_head thread_list;
1694        elf_fpregset_t *fpu;
1695#ifdef ELF_CORE_COPY_XFPREGS
1696        elf_fpxregset_t *xfpu;
1697#endif
1698        int thread_status_size;
1699        int numnote;
1700};
1701
1702static int fill_note_info(struct elfhdr *elf, int phdrs,
1703                          struct elf_note_info *info,
1704                          long signr, struct pt_regs *regs)
1705{
1706#define NUM_NOTES       6
1707        struct list_head *t;
1708
1709        info->notes = NULL;
1710        info->prstatus = NULL;
1711        info->psinfo = NULL;
1712        info->fpu = NULL;
1713#ifdef ELF_CORE_COPY_XFPREGS
1714        info->xfpu = NULL;
1715#endif
1716        INIT_LIST_HEAD(&info->thread_list);
1717
1718        info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1719                              GFP_KERNEL);
1720        if (!info->notes)
1721                return 0;
1722        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1723        if (!info->psinfo)
1724                return 0;
1725        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1726        if (!info->prstatus)
1727                return 0;
1728        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1729        if (!info->fpu)
1730                return 0;
1731#ifdef ELF_CORE_COPY_XFPREGS
1732        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1733        if (!info->xfpu)
1734                return 0;
1735#endif
1736
1737        info->thread_status_size = 0;
1738        if (signr) {
1739                struct core_thread *ct;
1740                struct elf_thread_status *ets;
1741
1742                for (ct = current->mm->core_state->dumper.next;
1743                                                ct; ct = ct->next) {
1744                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1745                        if (!ets)
1746                                return 0;
1747
1748                        ets->thread = ct->task;
1749                        list_add(&ets->list, &info->thread_list);
1750                }
1751
1752                list_for_each(t, &info->thread_list) {
1753                        int sz;
1754
1755                        ets = list_entry(t, struct elf_thread_status, list);
1756                        sz = elf_dump_thread_status(signr, ets);
1757                        info->thread_status_size += sz;
1758                }
1759        }
1760        /* now collect the dump for the current */
1761        memset(info->prstatus, 0, sizeof(*info->prstatus));
1762        fill_prstatus(info->prstatus, current, signr);
1763        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1764
1765        /* Set up header */
1766        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1767
1768        /*
1769         * Set up the notes in similar form to SVR4 core dumps made
1770         * with info from their /proc.
1771         */
1772
1773        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1774                  sizeof(*info->prstatus), info->prstatus);
1775        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1776        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1777                  sizeof(*info->psinfo), info->psinfo);
1778
1779        info->numnote = 2;
1780
1781        fill_auxv_note(&info->notes[info->numnote++], current->mm);
1782
1783        /* Try to dump the FPU. */
1784        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1785                                                               info->fpu);
1786        if (info->prstatus->pr_fpvalid)
1787                fill_note(info->notes + info->numnote++,
1788                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1789#ifdef ELF_CORE_COPY_XFPREGS
1790        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1791                fill_note(info->notes + info->numnote++,
1792                          "LINUX", ELF_CORE_XFPREG_TYPE,
1793                          sizeof(*info->xfpu), info->xfpu);
1794#endif
1795
1796        return 1;
1797
1798#undef NUM_NOTES
1799}
1800
1801static size_t get_note_info_size(struct elf_note_info *info)
1802{
1803        int sz = 0;
1804        int i;
1805
1806        for (i = 0; i < info->numnote; i++)
1807                sz += notesize(info->notes + i);
1808
1809        sz += info->thread_status_size;
1810
1811        return sz;
1812}
1813
1814static int write_note_info(struct elf_note_info *info,
1815                           struct file *file, loff_t *foffset)
1816{
1817        int i;
1818        struct list_head *t;
1819
1820        for (i = 0; i < info->numnote; i++)
1821                if (!writenote(info->notes + i, file, foffset))
1822                        return 0;
1823
1824        /* write out the thread status notes section */
1825        list_for_each(t, &info->thread_list) {
1826                struct elf_thread_status *tmp =
1827                                list_entry(t, struct elf_thread_status, list);
1828
1829                for (i = 0; i < tmp->num_notes; i++)
1830                        if (!writenote(&tmp->notes[i], file, foffset))
1831                                return 0;
1832        }
1833
1834        return 1;
1835}
1836
1837static void free_note_info(struct elf_note_info *info)
1838{
1839        while (!list_empty(&info->thread_list)) {
1840                struct list_head *tmp = info->thread_list.next;
1841                list_del(tmp);
1842                kfree(list_entry(tmp, struct elf_thread_status, list));
1843        }
1844
1845        kfree(info->prstatus);
1846        kfree(info->psinfo);
1847        kfree(info->notes);
1848        kfree(info->fpu);
1849#ifdef ELF_CORE_COPY_XFPREGS
1850        kfree(info->xfpu);
1851#endif
1852}
1853
1854#endif
1855
1856static struct vm_area_struct *first_vma(struct task_struct *tsk,
1857                                        struct vm_area_struct *gate_vma)
1858{
1859        struct vm_area_struct *ret = tsk->mm->mmap;
1860
1861        if (ret)
1862                return ret;
1863        return gate_vma;
1864}
1865/*
1866 * Helper function for iterating across a vma list.  It ensures that the caller
1867 * will visit `gate_vma' prior to terminating the search.
1868 */
1869static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1870                                        struct vm_area_struct *gate_vma)
1871{
1872        struct vm_area_struct *ret;
1873
1874        ret = this_vma->vm_next;
1875        if (ret)
1876                return ret;
1877        if (this_vma == gate_vma)
1878                return NULL;
1879        return gate_vma;
1880}
1881
1882/*
1883 * Actual dumper
1884 *
1885 * This is a two-pass process; first we find the offsets of the bits,
1886 * and then they are actually written out.  If we run out of core limit
1887 * we just truncate.
1888 */
1889static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1890{
1891        int has_dumped = 0;
1892        mm_segment_t fs;
1893        int segs;
1894        size_t size = 0;
1895        struct vm_area_struct *vma, *gate_vma;
1896        struct elfhdr *elf = NULL;
1897        loff_t offset = 0, dataoff, foffset;
1898        unsigned long mm_flags;
1899        struct elf_note_info info;
1900
1901        /*
1902         * We no longer stop all VM operations.
1903         * 
1904         * This is because those proceses that could possibly change map_count
1905         * or the mmap / vma pages are now blocked in do_exit on current
1906         * finishing this core dump.
1907         *
1908         * Only ptrace can touch these memory addresses, but it doesn't change
1909         * the map_count or the pages allocated. So no possibility of crashing
1910         * exists while dumping the mm->vm_next areas to the core file.
1911         */
1912  
1913        /* alloc memory for large data structures: too large to be on stack */
1914        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1915        if (!elf)
1916                goto out;
1917        
1918        segs = current->mm->map_count;
1919#ifdef ELF_CORE_EXTRA_PHDRS
1920        segs += ELF_CORE_EXTRA_PHDRS;
1921#endif
1922
1923        gate_vma = get_gate_vma(current);
1924        if (gate_vma != NULL)
1925                segs++;
1926
1927        /*
1928         * Collect all the non-memory information about the process for the
1929         * notes.  This also sets up the file header.
1930         */
1931        if (!fill_note_info(elf, segs + 1, /* including notes section */
1932                            &info, signr, regs))
1933                goto cleanup;
1934
1935        has_dumped = 1;
1936        current->flags |= PF_DUMPCORE;
1937  
1938        fs = get_fs();
1939        set_fs(KERNEL_DS);
1940
1941        DUMP_WRITE(elf, sizeof(*elf));
1942        offset += sizeof(*elf);                         /* Elf header */
1943        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1944        foffset = offset;
1945
1946        /* Write notes phdr entry */
1947        {
1948                struct elf_phdr phdr;
1949                size_t sz = get_note_info_size(&info);
1950
1951                sz += elf_coredump_extra_notes_size();
1952
1953                fill_elf_note_phdr(&phdr, sz, offset);
1954                offset += sz;
1955                DUMP_WRITE(&phdr, sizeof(phdr));
1956        }
1957
1958        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1959
1960        /*
1961         * We must use the same mm->flags while dumping core to avoid
1962         * inconsistency between the program headers and bodies, otherwise an
1963         * unusable core file can be generated.
1964         */
1965        mm_flags = current->mm->flags;
1966
1967        /* Write program headers for segments dump */
1968        for (vma = first_vma(current, gate_vma); vma != NULL;
1969                        vma = next_vma(vma, gate_vma)) {
1970                struct elf_phdr phdr;
1971
1972                phdr.p_type = PT_LOAD;
1973                phdr.p_offset = offset;
1974                phdr.p_vaddr = vma->vm_start;
1975                phdr.p_paddr = 0;
1976                phdr.p_filesz = vma_dump_size(vma, mm_flags);
1977                phdr.p_memsz = vma->vm_end - vma->vm_start;
1978                offset += phdr.p_filesz;
1979                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1980                if (vma->vm_flags & VM_WRITE)
1981                        phdr.p_flags |= PF_W;
1982                if (vma->vm_flags & VM_EXEC)
1983                        phdr.p_flags |= PF_X;
1984                phdr.p_align = ELF_EXEC_PAGESIZE;
1985
1986                DUMP_WRITE(&phdr, sizeof(phdr));
1987        }
1988
1989#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1990        ELF_CORE_WRITE_EXTRA_PHDRS;
1991#endif
1992
1993        /* write out the notes section */
1994        if (!write_note_info(&info, file, &foffset))
1995                goto end_coredump;
1996
1997        if (elf_coredump_extra_notes_write(file, &foffset))
1998                goto end_coredump;
1999
2000        /* Align to page */
2001        DUMP_SEEK(dataoff - foffset);
2002
2003        for (vma = first_vma(current, gate_vma); vma != NULL;
2004                        vma = next_vma(vma, gate_vma)) {
2005                unsigned long addr;
2006                unsigned long end;
2007
2008                end = vma->vm_start + vma_dump_size(vma, mm_flags);
2009
2010                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2011                        struct page *page;
2012                        struct vm_area_struct *tmp_vma;
2013
2014                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2015                                                &page, &tmp_vma) <= 0) {
2016                                DUMP_SEEK(PAGE_SIZE);
2017                        } else {
2018                                if (page == ZERO_PAGE(0)) {
2019                                        if (!dump_seek(file, PAGE_SIZE)) {
2020                                                page_cache_release(page);
2021                                                goto end_coredump;
2022                                        }
2023                                } else {
2024                                        void *kaddr;
2025                                        flush_cache_page(tmp_vma, addr,
2026                                                         page_to_pfn(page));
2027                                        kaddr = kmap(page);
2028                                        if ((size += PAGE_SIZE) > limit ||
2029                                            !dump_write(file, kaddr,
2030                                            PAGE_SIZE)) {
2031                                                kunmap(page);
2032                                                page_cache_release(page);
2033                                                goto end_coredump;
2034                                        }
2035                                        kunmap(page);
2036                                }
2037                                page_cache_release(page);
2038                        }
2039                }
2040        }
2041
2042#ifdef ELF_CORE_WRITE_EXTRA_DATA
2043        ELF_CORE_WRITE_EXTRA_DATA;
2044#endif
2045
2046end_coredump:
2047        set_fs(fs);
2048
2049cleanup:
2050        free_note_info(&info);
2051        kfree(elf);
2052out:
2053        return has_dumped;
2054}
2055
2056#endif          /* USE_ELF_CORE_DUMP */
2057
2058static int __init init_elf_binfmt(void)
2059{
2060        return register_binfmt(&elf_format);
2061}
2062
2063static void __exit exit_elf_binfmt(void)
2064{
2065        /* Remove the COFF and ELF loaders. */
2066        unregister_binfmt(&elf_format);
2067}
2068
2069core_initcall(init_elf_binfmt);
2070module_exit(exit_elf_binfmt);
2071MODULE_LICENSE("GPL");
2072
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.