linux/fs/proc/kcore.c History
<<
>>
Prefs
   1/*
   2 *      fs/proc/kcore.c kernel ELF core dumper
   3 *
   4 *      Modelled on fs/exec.c:aout_core_dump()
   5 *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
   6 *      ELF version written by David Howells <David.Howells@nexor.co.uk>
   7 *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
   8 *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
   9 *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/proc_fs.h>
  14#include <linux/user.h>
  15#include <linux/capability.h>
  16#include <linux/elf.h>
  17#include <linux/elfcore.h>
  18#include <linux/vmalloc.h>
  19#include <linux/highmem.h>
  20#include <linux/bootmem.h>
  21#include <linux/init.h>
  22#include <asm/uaccess.h>
  23#include <asm/io.h>
  24#include <linux/list.h>
  25#include <linux/ioport.h>
  26#include <linux/memory.h>
  27#include <asm/sections.h>
  28
  29#define CORE_STR "CORE"
  30
  31#ifndef ELF_CORE_EFLAGS
  32#define ELF_CORE_EFLAGS 0
  33#endif
  34
  35static struct proc_dir_entry *proc_root_kcore;
  36
  37
  38#ifndef kc_vaddr_to_offset
  39#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  40#endif
  41#ifndef kc_offset_to_vaddr
  42#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  43#endif
  44
  45/* An ELF note in memory */
  46struct memelfnote
  47{
  48        const char *name;
  49        int type;
  50        unsigned int datasz;
  51        void *data;
  52};
  53
  54static LIST_HEAD(kclist_head);
  55static DEFINE_RWLOCK(kclist_lock);
  56static int kcore_need_update = 1;
  57
  58void
  59kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
  60{
  61        new->addr = (unsigned long)addr;
  62        new->size = size;
  63        new->type = type;
  64
  65        write_lock(&kclist_lock);
  66        list_add_tail(&new->list, &kclist_head);
  67        write_unlock(&kclist_lock);
  68}
  69
  70static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
  71{
  72        size_t try, size;
  73        struct kcore_list *m;
  74
  75        *nphdr = 1; /* PT_NOTE */
  76        size = 0;
  77
  78        list_for_each_entry(m, &kclist_head, list) {
  79                try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  80                if (try > size)
  81                        size = try;
  82                *nphdr = *nphdr + 1;
  83        }
  84        *elf_buflen =   sizeof(struct elfhdr) + 
  85                        (*nphdr + 2)*sizeof(struct elf_phdr) + 
  86                        3 * ((sizeof(struct elf_note)) +
  87                             roundup(sizeof(CORE_STR), 4)) +
  88                        roundup(sizeof(struct elf_prstatus), 4) +
  89                        roundup(sizeof(struct elf_prpsinfo), 4) +
  90                        roundup(sizeof(struct task_struct), 4);
  91        *elf_buflen = PAGE_ALIGN(*elf_buflen);
  92        return size + *elf_buflen;
  93}
  94
  95static void free_kclist_ents(struct list_head *head)
  96{
  97        struct kcore_list *tmp, *pos;
  98
  99        list_for_each_entry_safe(pos, tmp, head, list) {
 100                list_del(&pos->list);
 101                kfree(pos);
 102        }
 103}
 104/*
 105 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
 106 */
 107static void __kcore_update_ram(struct list_head *list)
 108{
 109        int nphdr;
 110        size_t size;
 111        struct kcore_list *tmp, *pos;
 112        LIST_HEAD(garbage);
 113
 114        write_lock(&kclist_lock);
 115        if (kcore_need_update) {
 116                list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 117                        if (pos->type == KCORE_RAM
 118                                || pos->type == KCORE_VMEMMAP)
 119                                list_move(&pos->list, &garbage);
 120                }
 121                list_splice_tail(list, &kclist_head);
 122        } else
 123                list_splice(list, &garbage);
 124        kcore_need_update = 0;
 125        proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 126        write_unlock(&kclist_lock);
 127
 128        free_kclist_ents(&garbage);
 129}
 130
 131
 132#ifdef CONFIG_HIGHMEM
 133/*
 134 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
 135 * because memory hole is not as big as !HIGHMEM case.
 136 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
 137 */
 138static int kcore_update_ram(void)
 139{
 140        LIST_HEAD(head);
 141        struct kcore_list *ent;
 142        int ret = 0;
 143
 144        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 145        if (!ent)
 146                return -ENOMEM;
 147        ent->addr = (unsigned long)__va(0);
 148        ent->size = max_low_pfn << PAGE_SHIFT;
 149        ent->type = KCORE_RAM;
 150        list_add(&ent->list, &head);
 151        __kcore_update_ram(&head);
 152        return ret;
 153}
 154
 155#else /* !CONFIG_HIGHMEM */
 156
 157#ifdef CONFIG_SPARSEMEM_VMEMMAP
 158/* calculate vmemmap's address from given system ram pfn and register it */
 159int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 160{
 161        unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
 162        unsigned long nr_pages = ent->size >> PAGE_SHIFT;
 163        unsigned long start, end;
 164        struct kcore_list *vmm, *tmp;
 165
 166
 167        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
 168        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
 169        end = ALIGN(end, PAGE_SIZE);
 170        /* overlap check (because we have to align page */
 171        list_for_each_entry(tmp, head, list) {
 172                if (tmp->type != KCORE_VMEMMAP)
 173                        continue;
 174                if (start < tmp->addr + tmp->size)
 175                        if (end > tmp->addr)
 176                                end = tmp->addr;
 177        }
 178        if (start < end) {
 179                vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
 180                if (!vmm)
 181                        return 0;
 182                vmm->addr = start;
 183                vmm->size = end - start;
 184                vmm->type = KCORE_VMEMMAP;
 185                list_add_tail(&vmm->list, head);
 186        }
 187        return 1;
 188
 189}
 190#else
 191int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 192{
 193        return 1;
 194}
 195
 196#endif
 197
 198static int
 199kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 200{
 201        struct list_head *head = (struct list_head *)arg;
 202        struct kcore_list *ent;
 203
 204        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 205        if (!ent)
 206                return -ENOMEM;
 207        ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
 208        ent->size = nr_pages << PAGE_SHIFT;
 209
 210        /* Sanity check: Can happen in 32bit arch...maybe */
 211        if (ent->addr < (unsigned long) __va(0))
 212                goto free_out;
 213
 214        /* cut not-mapped area. ....from ppc-32 code. */
 215        if (ULONG_MAX - ent->addr < ent->size)
 216                ent->size = ULONG_MAX - ent->addr;
 217
 218        /* cut when vmalloc() area is higher than direct-map area */
 219        if (VMALLOC_START > (unsigned long)__va(0)) {
 220                if (ent->addr > VMALLOC_START)
 221                        goto free_out;
 222                if (VMALLOC_START - ent->addr < ent->size)
 223                        ent->size = VMALLOC_START - ent->addr;
 224        }
 225
 226        ent->type = KCORE_RAM;
 227        list_add_tail(&ent->list, head);
 228
 229        if (!get_sparsemem_vmemmap_info(ent, head)) {
 230                list_del(&ent->list);
 231                goto free_out;
 232        }
 233
 234        return 0;
 235free_out:
 236        kfree(ent);
 237        return 1;
 238}
 239
 240static int kcore_update_ram(void)
 241{
 242        int nid, ret;
 243        unsigned long end_pfn;
 244        LIST_HEAD(head);
 245
 246        /* Not inialized....update now */
 247        /* find out "max pfn" */
 248        end_pfn = 0;
 249        for_each_node_state(nid, N_HIGH_MEMORY) {
 250                unsigned long node_end;
 251                node_end  = NODE_DATA(nid)->node_start_pfn +
 252                        NODE_DATA(nid)->node_spanned_pages;
 253                if (end_pfn < node_end)
 254                        end_pfn = node_end;
 255        }
 256        /* scan 0 to max_pfn */
 257        ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
 258        if (ret) {
 259                free_kclist_ents(&head);
 260                return -ENOMEM;
 261        }
 262        __kcore_update_ram(&head);
 263        return ret;
 264}
 265#endif /* CONFIG_HIGHMEM */
 266
 267/*****************************************************************************/
 268/*
 269 * determine size of ELF note
 270 */
 271static int notesize(struct memelfnote *en)
 272{
 273        int sz;
 274
 275        sz = sizeof(struct elf_note);
 276        sz += roundup((strlen(en->name) + 1), 4);
 277        sz += roundup(en->datasz, 4);
 278
 279        return sz;
 280} /* end notesize() */
 281
 282/*****************************************************************************/
 283/*
 284 * store a note in the header buffer
 285 */
 286static char *storenote(struct memelfnote *men, char *bufp)
 287{
 288        struct elf_note en;
 289
 290#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 291
 292        en.n_namesz = strlen(men->name) + 1;
 293        en.n_descsz = men->datasz;
 294        en.n_type = men->type;
 295
 296        DUMP_WRITE(&en, sizeof(en));
 297        DUMP_WRITE(men->name, en.n_namesz);
 298
 299        /* XXX - cast from long long to long to avoid need for libgcc.a */
 300        bufp = (char*) roundup((unsigned long)bufp,4);
 301        DUMP_WRITE(men->data, men->datasz);
 302        bufp = (char*) roundup((unsigned long)bufp,4);
 303
 304#undef DUMP_WRITE
 305
 306        return bufp;
 307} /* end storenote() */
 308
 309/*
 310 * store an ELF coredump header in the supplied buffer
 311 * nphdr is the number of elf_phdr to insert
 312 */
 313static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 314{
 315        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
 316        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
 317        struct elf_phdr *nhdr, *phdr;
 318        struct elfhdr *elf;
 319        struct memelfnote notes[3];
 320        off_t offset = 0;
 321        struct kcore_list *m;
 322
 323        /* setup ELF header */
 324        elf = (struct elfhdr *) bufp;
 325        bufp += sizeof(struct elfhdr);
 326        offset += sizeof(struct elfhdr);
 327        memcpy(elf->e_ident, ELFMAG, SELFMAG);
 328        elf->e_ident[EI_CLASS]  = ELF_CLASS;
 329        elf->e_ident[EI_DATA]   = ELF_DATA;
 330        elf->e_ident[EI_VERSION]= EV_CURRENT;
 331        elf->e_ident[EI_OSABI] = ELF_OSABI;
 332        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 333        elf->e_type     = ET_CORE;
 334        elf->e_machine  = ELF_ARCH;
 335        elf->e_version  = EV_CURRENT;
 336        elf->e_entry    = 0;
 337        elf->e_phoff    = sizeof(struct elfhdr);
 338        elf->e_shoff    = 0;
 339        elf->e_flags    = ELF_CORE_EFLAGS;
 340        elf->e_ehsize   = sizeof(struct elfhdr);
 341        elf->e_phentsize= sizeof(struct elf_phdr);
 342        elf->e_phnum    = nphdr;
 343        elf->e_shentsize= 0;
 344        elf->e_shnum    = 0;
 345        elf->e_shstrndx = 0;
 346
 347        /* setup ELF PT_NOTE program header */
 348        nhdr = (struct elf_phdr *) bufp;
 349        bufp += sizeof(struct elf_phdr);
 350        offset += sizeof(struct elf_phdr);
 351        nhdr->p_type    = PT_NOTE;
 352        nhdr->p_offset  = 0;
 353        nhdr->p_vaddr   = 0;
 354        nhdr->p_paddr   = 0;
 355        nhdr->p_filesz  = 0;
 356        nhdr->p_memsz   = 0;
 357        nhdr->p_flags   = 0;
 358        nhdr->p_align   = 0;
 359
 360        /* setup ELF PT_LOAD program header for every area */
 361        list_for_each_entry(m, &kclist_head, list) {
 362                phdr = (struct elf_phdr *) bufp;
 363                bufp += sizeof(struct elf_phdr);
 364                offset += sizeof(struct elf_phdr);
 365
 366                phdr->p_type    = PT_LOAD;
 367                phdr->p_flags   = PF_R|PF_W|PF_X;
 368                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
 369                phdr->p_vaddr   = (size_t)m->addr;
 370                phdr->p_paddr   = 0;
 371                phdr->p_filesz  = phdr->p_memsz = m->size;
 372                phdr->p_align   = PAGE_SIZE;
 373        }
 374
 375        /*
 376         * Set up the notes in similar form to SVR4 core dumps made
 377         * with info from their /proc.
 378         */
 379        nhdr->p_offset  = offset;
 380
 381        /* set up the process status */
 382        notes[0].name = CORE_STR;
 383        notes[0].type = NT_PRSTATUS;
 384        notes[0].datasz = sizeof(struct elf_prstatus);
 385        notes[0].data = &prstatus;
 386
 387        memset(&prstatus, 0, sizeof(struct elf_prstatus));
 388
 389        nhdr->p_filesz  = notesize(&notes[0]);
 390        bufp = storenote(&notes[0], bufp);
 391
 392        /* set up the process info */
 393        notes[1].name   = CORE_STR;
 394        notes[1].type   = NT_PRPSINFO;
 395        notes[1].datasz = sizeof(struct elf_prpsinfo);
 396        notes[1].data   = &prpsinfo;
 397
 398        memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
 399        prpsinfo.pr_state       = 0;
 400        prpsinfo.pr_sname       = 'R';
 401        prpsinfo.pr_zomb        = 0;
 402
 403        strcpy(prpsinfo.pr_fname, "vmlinux");
 404        strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
 405
 406        nhdr->p_filesz  += notesize(&notes[1]);
 407        bufp = storenote(&notes[1], bufp);
 408
 409        /* set up the task structure */
 410        notes[2].name   = CORE_STR;
 411        notes[2].type   = NT_TASKSTRUCT;
 412        notes[2].datasz = sizeof(struct task_struct);
 413        notes[2].data   = current;
 414
 415        nhdr->p_filesz  += notesize(&notes[2]);
 416        bufp = storenote(&notes[2], bufp);
 417
 418} /* end elf_kcore_store_hdr() */
 419
 420/*****************************************************************************/
 421/*
 422 * read from the ELF header and then kernel memory
 423 */
 424static ssize_t
 425read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 426{
 427        ssize_t acc = 0;
 428        size_t size, tsz;
 429        size_t elf_buflen;
 430        int nphdr;
 431        unsigned long start;
 432
 433        read_lock(&kclist_lock);
 434        size = get_kcore_size(&nphdr, &elf_buflen);
 435
 436        if (buflen == 0 || *fpos >= size) {
 437                read_unlock(&kclist_lock);
 438                return 0;
 439        }
 440
 441        /* trim buflen to not go beyond EOF */
 442        if (buflen > size - *fpos)
 443                buflen = size - *fpos;
 444
 445        /* construct an ELF core header if we'll need some of it */
 446        if (*fpos < elf_buflen) {
 447                char * elf_buf;
 448
 449                tsz = elf_buflen - *fpos;
 450                if (buflen < tsz)
 451                        tsz = buflen;
 452                elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
 453                if (!elf_buf) {
 454                        read_unlock(&kclist_lock);
 455                        return -ENOMEM;
 456                }
 457                elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
 458                read_unlock(&kclist_lock);
 459                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 460                        kfree(elf_buf);
 461                        return -EFAULT;
 462                }
 463                kfree(elf_buf);
 464                buflen -= tsz;
 465                *fpos += tsz;
 466                buffer += tsz;
 467                acc += tsz;
 468
 469                /* leave now if filled buffer already */
 470                if (buflen == 0)
 471                        return acc;
 472        } else
 473                read_unlock(&kclist_lock);
 474
 475        /*
 476         * Check to see if our file offset matches with any of
 477         * the addresses in the elf_phdr on our list.
 478         */
 479        start = kc_offset_to_vaddr(*fpos - elf_buflen);
 480        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 481                tsz = buflen;
 482                
 483        while (buflen) {
 484                struct kcore_list *m;
 485
 486                read_lock(&kclist_lock);
 487                list_for_each_entry(m, &kclist_head, list) {
 488                        if (start >= m->addr && start < (m->addr+m->size))
 489                                break;
 490                }
 491                read_unlock(&kclist_lock);
 492
 493                if (m == NULL) {
 494                        if (clear_user(buffer, tsz))
 495                                return -EFAULT;
 496                } else if (is_vmalloc_or_module_addr((void *)start)) {
 497                        char * elf_buf;
 498
 499                        elf_buf = kzalloc(tsz, GFP_KERNEL);
 500                        if (!elf_buf)
 501                                return -ENOMEM;
 502                        vread(elf_buf, (char *)start, tsz);
 503                        /* we have to zero-fill user buffer even if no read */
 504                        if (copy_to_user(buffer, elf_buf, tsz)) {
 505                                kfree(elf_buf);
 506                                return -EFAULT;
 507                        }
 508                        kfree(elf_buf);
 509                } else {
 510                        if (kern_addr_valid(start)) {
 511                                unsigned long n;
 512
 513                                n = copy_to_user(buffer, (char *)start, tsz);
 514                                /*
 515                                 * We cannot distingush between fault on source
 516                                 * and fault on destination. When this happens
 517                                 * we clear too and hope it will trigger the
 518                                 * EFAULT again.
 519                                 */
 520                                if (n) { 
 521                                        if (clear_user(buffer + tsz - n,
 522                                                                n))
 523                                                return -EFAULT;
 524                                }
 525                        } else {
 526                                if (clear_user(buffer, tsz))
 527                                        return -EFAULT;
 528                        }
 529                }
 530                buflen -= tsz;
 531                *fpos += tsz;
 532                buffer += tsz;
 533                acc += tsz;
 534                start += tsz;
 535                tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 536        }
 537
 538        return acc;
 539}
 540
 541
 542static int open_kcore(struct inode *inode, struct file *filp)
 543{
 544        if (!capable(CAP_SYS_RAWIO))
 545                return -EPERM;
 546        if (kcore_need_update)
 547                kcore_update_ram();
 548        if (i_size_read(inode) != proc_root_kcore->size) {
 549                mutex_lock(&inode->i_mutex);
 550                i_size_write(inode, proc_root_kcore->size);
 551                mutex_unlock(&inode->i_mutex);
 552        }
 553        return 0;
 554}
 555
 556
 557static const struct file_operations proc_kcore_operations = {
 558        .read           = read_kcore,
 559        .open           = open_kcore,
 560};
 561
 562#ifdef CONFIG_MEMORY_HOTPLUG
 563/* just remember that we have to update kcore */
 564static int __meminit kcore_callback(struct notifier_block *self,
 565                                    unsigned long action, void *arg)
 566{
 567        switch (action) {
 568        case MEM_ONLINE:
 569        case MEM_OFFLINE:
 570                write_lock(&kclist_lock);
 571                kcore_need_update = 1;
 572                write_unlock(&kclist_lock);
 573        }
 574        return NOTIFY_OK;
 575}
 576#endif
 577
 578
 579static struct kcore_list kcore_vmalloc;
 580
 581#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
 582static struct kcore_list kcore_text;
 583/*
 584 * If defined, special segment is used for mapping kernel text instead of
 585 * direct-map area. We need to create special TEXT section.
 586 */
 587static void __init proc_kcore_text_init(void)
 588{
 589        kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT);
 590}
 591#else
 592static void __init proc_kcore_text_init(void)
 593{
 594}
 595#endif
 596
 597#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
 598/*
 599 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
 600 */
 601struct kcore_list kcore_modules;
 602static void __init add_modules_range(void)
 603{
 604        kclist_add(&kcore_modules, (void *)MODULES_VADDR,
 605                        MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
 606}
 607#else
 608static void __init add_modules_range(void)
 609{
 610}
 611#endif
 612
 613static int __init proc_kcore_init(void)
 614{
 615        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 616                                      &proc_kcore_operations);
 617        if (!proc_root_kcore) {
 618                printk(KERN_ERR "couldn't create /proc/kcore\n");
 619                return 0; /* Always returns 0. */
 620        }
 621        /* Store text area if it's special */
 622        proc_kcore_text_init();
 623        /* Store vmalloc area */
 624        kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 625                VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 626        add_modules_range();
 627        /* Store direct-map area from physical memory map */
 628        kcore_update_ram();
 629        hotplug_memory_notifier(kcore_callback, 0);
 630
 631        return 0;
 632}
 633module_init(proc_kcore_init);
 634
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.