linux/fs/proc/kcore.c
<<
>>
Prefs
   1/*
   2 *      fs/proc/kcore.c kernel ELF core dumper
   3 *
   4 *      Modelled on fs/exec.c:aout_core_dump()
   5 *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
   6 *      ELF version written by David Howells <David.Howells@nexor.co.uk>
   7 *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
   8 *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
   9 *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/proc_fs.h>
  14#include <linux/user.h>
  15#include <linux/capability.h>
  16#include <linux/elf.h>
  17#include <linux/elfcore.h>
  18#include <linux/vmalloc.h>
  19#include <linux/highmem.h>
  20#include <linux/bootmem.h>
  21#include <linux/init.h>
  22#include <linux/slab.h>
  23#include <asm/uaccess.h>
  24#include <asm/io.h>
  25#include <linux/list.h>
  26#include <linux/ioport.h>
  27#include <linux/memory.h>
  28#include <asm/sections.h>
  29
  30#define CORE_STR "CORE"
  31
  32#ifndef ELF_CORE_EFLAGS
  33#define ELF_CORE_EFLAGS 0
  34#endif
  35
  36static struct proc_dir_entry *proc_root_kcore;
  37
  38
  39#ifndef kc_vaddr_to_offset
  40#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  41#endif
  42#ifndef kc_offset_to_vaddr
  43#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  44#endif
  45
  46/* An ELF note in memory */
  47struct memelfnote
  48{
  49        const char *name;
  50        int type;
  51        unsigned int datasz;
  52        void *data;
  53};
  54
  55static LIST_HEAD(kclist_head);
  56static DEFINE_RWLOCK(kclist_lock);
  57static int kcore_need_update = 1;
  58
  59void
  60kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
  61{
  62        new->addr = (unsigned long)addr;
  63        new->size = size;
  64        new->type = type;
  65
  66        write_lock(&kclist_lock);
  67        list_add_tail(&new->list, &kclist_head);
  68        write_unlock(&kclist_lock);
  69}
  70
  71static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
  72{
  73        size_t try, size;
  74        struct kcore_list *m;
  75
  76        *nphdr = 1; /* PT_NOTE */
  77        size = 0;
  78
  79        list_for_each_entry(m, &kclist_head, list) {
  80                try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  81                if (try > size)
  82                        size = try;
  83                *nphdr = *nphdr + 1;
  84        }
  85        *elf_buflen =   sizeof(struct elfhdr) + 
  86                        (*nphdr + 2)*sizeof(struct elf_phdr) + 
  87                        3 * ((sizeof(struct elf_note)) +
  88                             roundup(sizeof(CORE_STR), 4)) +
  89                        roundup(sizeof(struct elf_prstatus), 4) +
  90                        roundup(sizeof(struct elf_prpsinfo), 4) +
  91                        roundup(sizeof(struct task_struct), 4);
  92        *elf_buflen = PAGE_ALIGN(*elf_buflen);
  93        return size + *elf_buflen;
  94}
  95
  96static void free_kclist_ents(struct list_head *head)
  97{
  98        struct kcore_list *tmp, *pos;
  99
 100        list_for_each_entry_safe(pos, tmp, head, list) {
 101                list_del(&pos->list);
 102                kfree(pos);
 103        }
 104}
 105/*
 106 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
 107 */
 108static void __kcore_update_ram(struct list_head *list)
 109{
 110        int nphdr;
 111        size_t size;
 112        struct kcore_list *tmp, *pos;
 113        LIST_HEAD(garbage);
 114
 115        write_lock(&kclist_lock);
 116        if (kcore_need_update) {
 117                list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 118                        if (pos->type == KCORE_RAM
 119                                || pos->type == KCORE_VMEMMAP)
 120                                list_move(&pos->list, &garbage);
 121                }
 122                list_splice_tail(list, &kclist_head);
 123        } else
 124                list_splice(list, &garbage);
 125        kcore_need_update = 0;
 126        proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 127        write_unlock(&kclist_lock);
 128
 129        free_kclist_ents(&garbage);
 130}
 131
 132
 133#ifdef CONFIG_HIGHMEM
 134/*
 135 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
 136 * because memory hole is not as big as !HIGHMEM case.
 137 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
 138 */
 139static int kcore_update_ram(void)
 140{
 141        LIST_HEAD(head);
 142        struct kcore_list *ent;
 143        int ret = 0;
 144
 145        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 146        if (!ent)
 147                return -ENOMEM;
 148        ent->addr = (unsigned long)__va(0);
 149        ent->size = max_low_pfn << PAGE_SHIFT;
 150        ent->type = KCORE_RAM;
 151        list_add(&ent->list, &head);
 152        __kcore_update_ram(&head);
 153        return ret;
 154}
 155
 156#else /* !CONFIG_HIGHMEM */
 157
 158#ifdef CONFIG_SPARSEMEM_VMEMMAP
 159/* calculate vmemmap's address from given system ram pfn and register it */
 160int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 161{
 162        unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
 163        unsigned long nr_pages = ent->size >> PAGE_SHIFT;
 164        unsigned long start, end;
 165        struct kcore_list *vmm, *tmp;
 166
 167
 168        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
 169        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
 170        end = ALIGN(end, PAGE_SIZE);
 171        /* overlap check (because we have to align page */
 172        list_for_each_entry(tmp, head, list) {
 173                if (tmp->type != KCORE_VMEMMAP)
 174                        continue;
 175                if (start < tmp->addr + tmp->size)
 176                        if (end > tmp->addr)
 177                                end = tmp->addr;
 178        }
 179        if (start < end) {
 180                vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
 181                if (!vmm)
 182                        return 0;
 183                vmm->addr = start;
 184                vmm->size = end - start;
 185                vmm->type = KCORE_VMEMMAP;
 186                list_add_tail(&vmm->list, head);
 187        }
 188        return 1;
 189
 190}
 191#else
 192int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 193{
 194        return 1;
 195}
 196
 197#endif
 198
 199static int
 200kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 201{
 202        struct list_head *head = (struct list_head *)arg;
 203        struct kcore_list *ent;
 204
 205        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 206        if (!ent)
 207                return -ENOMEM;
 208        ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
 209        ent->size = nr_pages << PAGE_SHIFT;
 210
 211        /* Sanity check: Can happen in 32bit arch...maybe */
 212        if (ent->addr < (unsigned long) __va(0))
 213                goto free_out;
 214
 215        /* cut not-mapped area. ....from ppc-32 code. */
 216        if (ULONG_MAX - ent->addr < ent->size)
 217                ent->size = ULONG_MAX - ent->addr;
 218
 219        /* cut when vmalloc() area is higher than direct-map area */
 220        if (VMALLOC_START > (unsigned long)__va(0)) {
 221                if (ent->addr > VMALLOC_START)
 222                        goto free_out;
 223                if (VMALLOC_START - ent->addr < ent->size)
 224                        ent->size = VMALLOC_START - ent->addr;
 225        }
 226
 227        ent->type = KCORE_RAM;
 228        list_add_tail(&ent->list, head);
 229
 230        if (!get_sparsemem_vmemmap_info(ent, head)) {
 231                list_del(&ent->list);
 232                goto free_out;
 233        }
 234
 235        return 0;
 236free_out:
 237        kfree(ent);
 238        return 1;
 239}
 240
 241static int kcore_update_ram(void)
 242{
 243        int nid, ret;
 244        unsigned long end_pfn;
 245        LIST_HEAD(head);
 246
 247        /* Not inialized....update now */
 248        /* find out "max pfn" */
 249        end_pfn = 0;
 250        for_each_node_state(nid, N_HIGH_MEMORY) {
 251                unsigned long node_end;
 252                node_end  = NODE_DATA(nid)->node_start_pfn +
 253                        NODE_DATA(nid)->node_spanned_pages;
 254                if (end_pfn < node_end)
 255                        end_pfn = node_end;
 256        }
 257        /* scan 0 to max_pfn */
 258        ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
 259        if (ret) {
 260                free_kclist_ents(&head);
 261                return -ENOMEM;
 262        }
 263        __kcore_update_ram(&head);
 264        return ret;
 265}
 266#endif /* CONFIG_HIGHMEM */
 267
 268/*****************************************************************************/
 269/*
 270 * determine size of ELF note
 271 */
 272static int notesize(struct memelfnote *en)
 273{
 274        int sz;
 275
 276        sz = sizeof(struct elf_note);
 277        sz += roundup((strlen(en->name) + 1), 4);
 278        sz += roundup(en->datasz, 4);
 279
 280        return sz;
 281} /* end notesize() */
 282
 283/*****************************************************************************/
 284/*
 285 * store a note in the header buffer
 286 */
 287static char *storenote(struct memelfnote *men, char *bufp)
 288{
 289        struct elf_note en;
 290
 291#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 292
 293        en.n_namesz = strlen(men->name) + 1;
 294        en.n_descsz = men->datasz;
 295        en.n_type = men->type;
 296
 297        DUMP_WRITE(&en, sizeof(en));
 298        DUMP_WRITE(men->name, en.n_namesz);
 299
 300        /* XXX - cast from long long to long to avoid need for libgcc.a */
 301        bufp = (char*) roundup((unsigned long)bufp,4);
 302        DUMP_WRITE(men->data, men->datasz);
 303        bufp = (char*) roundup((unsigned long)bufp,4);
 304
 305#undef DUMP_WRITE
 306
 307        return bufp;
 308} /* end storenote() */
 309
 310/*
 311 * store an ELF coredump header in the supplied buffer
 312 * nphdr is the number of elf_phdr to insert
 313 */
 314static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 315{
 316        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
 317        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
 318        struct elf_phdr *nhdr, *phdr;
 319        struct elfhdr *elf;
 320        struct memelfnote notes[3];
 321        off_t offset = 0;
 322        struct kcore_list *m;
 323
 324        /* setup ELF header */
 325        elf = (struct elfhdr *) bufp;
 326        bufp += sizeof(struct elfhdr);
 327        offset += sizeof(struct elfhdr);
 328        memcpy(elf->e_ident, ELFMAG, SELFMAG);
 329        elf->e_ident[EI_CLASS]  = ELF_CLASS;
 330        elf->e_ident[EI_DATA]   = ELF_DATA;
 331        elf->e_ident[EI_VERSION]= EV_CURRENT;
 332        elf->e_ident[EI_OSABI] = ELF_OSABI;
 333        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 334        elf->e_type     = ET_CORE;
 335        elf->e_machine  = ELF_ARCH;
 336        elf->e_version  = EV_CURRENT;
 337        elf->e_entry    = 0;
 338        elf->e_phoff    = sizeof(struct elfhdr);
 339        elf->e_shoff    = 0;
 340        elf->e_flags    = ELF_CORE_EFLAGS;
 341        elf->e_ehsize   = sizeof(struct elfhdr);
 342        elf->e_phentsize= sizeof(struct elf_phdr);
 343        elf->e_phnum    = nphdr;
 344        elf->e_shentsize= 0;
 345        elf->e_shnum    = 0;
 346        elf->e_shstrndx = 0;
 347
 348        /* setup ELF PT_NOTE program header */
 349        nhdr = (struct elf_phdr *) bufp;
 350        bufp += sizeof(struct elf_phdr);
 351        offset += sizeof(struct elf_phdr);
 352        nhdr->p_type    = PT_NOTE;
 353        nhdr->p_offset  = 0;
 354        nhdr->p_vaddr   = 0;
 355        nhdr->p_paddr   = 0;
 356        nhdr->p_filesz  = 0;
 357        nhdr->p_memsz   = 0;
 358        nhdr->p_flags   = 0;
 359        nhdr->p_align   = 0;
 360
 361        /* setup ELF PT_LOAD program header for every area */
 362        list_for_each_entry(m, &kclist_head, list) {
 363                phdr = (struct elf_phdr *) bufp;
 364                bufp += sizeof(struct elf_phdr);
 365                offset += sizeof(struct elf_phdr);
 366
 367                phdr->p_type    = PT_LOAD;
 368                phdr->p_flags   = PF_R|PF_W|PF_X;
 369                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
 370                phdr->p_vaddr   = (size_t)m->addr;
 371                phdr->p_paddr   = 0;
 372                phdr->p_filesz  = phdr->p_memsz = m->size;
 373                phdr->p_align   = PAGE_SIZE;
 374        }
 375
 376        /*
 377         * Set up the notes in similar form to SVR4 core dumps made
 378         * with info from their /proc.
 379         */
 380        nhdr->p_offset  = offset;
 381
 382        /* set up the process status */
 383        notes[0].name = CORE_STR;
 384        notes[0].type = NT_PRSTATUS;
 385        notes[0].datasz = sizeof(struct elf_prstatus);
 386        notes[0].data = &prstatus;
 387
 388        memset(&prstatus, 0, sizeof(struct elf_prstatus));
 389
 390        nhdr->p_filesz  = notesize(&notes[0]);
 391        bufp = storenote(&notes[0], bufp);
 392
 393        /* set up the process info */
 394        notes[1].name   = CORE_STR;
 395        notes[1].type   = NT_PRPSINFO;
 396        notes[1].datasz = sizeof(struct elf_prpsinfo);
 397        notes[1].data   = &prpsinfo;
 398
 399        memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
 400        prpsinfo.pr_state       = 0;
 401        prpsinfo.pr_sname       = 'R';
 402        prpsinfo.pr_zomb        = 0;
 403
 404        strcpy(prpsinfo.pr_fname, "vmlinux");
 405        strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
 406
 407        nhdr->p_filesz  += notesize(&notes[1]);
 408        bufp = storenote(&notes[1], bufp);
 409
 410        /* set up the task structure */
 411        notes[2].name   = CORE_STR;
 412        notes[2].type   = NT_TASKSTRUCT;
 413        notes[2].datasz = sizeof(struct task_struct);
 414        notes[2].data   = current;
 415
 416        nhdr->p_filesz  += notesize(&notes[2]);
 417        bufp = storenote(&notes[2], bufp);
 418
 419} /* end elf_kcore_store_hdr() */
 420
 421/*****************************************************************************/
 422/*
 423 * read from the ELF header and then kernel memory
 424 */
 425static ssize_t
 426read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 427{
 428        ssize_t acc = 0;
 429        size_t size, tsz;
 430        size_t elf_buflen;
 431        int nphdr;
 432        unsigned long start;
 433
 434        read_lock(&kclist_lock);
 435        size = get_kcore_size(&nphdr, &elf_buflen);
 436
 437        if (buflen == 0 || *fpos >= size) {
 438                read_unlock(&kclist_lock);
 439                return 0;
 440        }
 441
 442        /* trim buflen to not go beyond EOF */
 443        if (buflen > size - *fpos)
 444                buflen = size - *fpos;
 445
 446        /* construct an ELF core header if we'll need some of it */
 447        if (*fpos < elf_buflen) {
 448                char * elf_buf;
 449
 450                tsz = elf_buflen - *fpos;
 451                if (buflen < tsz)
 452                        tsz = buflen;
 453                elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
 454                if (!elf_buf) {
 455                        read_unlock(&kclist_lock);
 456                        return -ENOMEM;
 457                }
 458                elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
 459                read_unlock(&kclist_lock);
 460                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 461                        kfree(elf_buf);
 462                        return -EFAULT;
 463                }
 464                kfree(elf_buf);
 465                buflen -= tsz;
 466                *fpos += tsz;
 467                buffer += tsz;
 468                acc += tsz;
 469
 470                /* leave now if filled buffer already */
 471                if (buflen == 0)
 472                        return acc;
 473        } else
 474                read_unlock(&kclist_lock);
 475
 476        /*
 477         * Check to see if our file offset matches with any of
 478         * the addresses in the elf_phdr on our list.
 479         */
 480        start = kc_offset_to_vaddr(*fpos - elf_buflen);
 481        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 482                tsz = buflen;
 483                
 484        while (buflen) {
 485                struct kcore_list *m;
 486
 487                read_lock(&kclist_lock);
 488                list_for_each_entry(m, &kclist_head, list) {
 489                        if (start >= m->addr && start < (m->addr+m->size))
 490                                break;
 491                }
 492                read_unlock(&kclist_lock);
 493
 494                if (&m->list == &kclist_head) {
 495                        if (clear_user(buffer, tsz))
 496                                return -EFAULT;
 497                } else if (is_vmalloc_or_module_addr((void *)start)) {
 498                        char * elf_buf;
 499
 500                        elf_buf = kzalloc(tsz, GFP_KERNEL);
 501                        if (!elf_buf)
 502                                return -ENOMEM;
 503                        vread(elf_buf, (char *)start, tsz);
 504                        /* we have to zero-fill user buffer even if no read */
 505                        if (copy_to_user(buffer, elf_buf, tsz)) {
 506                                kfree(elf_buf);
 507                                return -EFAULT;
 508                        }
 509                        kfree(elf_buf);
 510                } else {
 511                        if (kern_addr_valid(start)) {
 512                                unsigned long n;
 513
 514                                n = copy_to_user(buffer, (char *)start, tsz);
 515                                /*
 516                                 * We cannot distingush between fault on source
 517                                 * and fault on destination. When this happens
 518                                 * we clear too and hope it will trigger the
 519                                 * EFAULT again.
 520                                 */
 521                                if (n) { 
 522                                        if (clear_user(buffer + tsz - n,
 523                                                                n))
 524                                                return -EFAULT;
 525                                }
 526                        } else {
 527                                if (clear_user(buffer, tsz))
 528                                        return -EFAULT;
 529                        }
 530                }
 531                buflen -= tsz;
 532                *fpos += tsz;
 533                buffer += tsz;
 534                acc += tsz;
 535                start += tsz;
 536                tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 537        }
 538
 539        return acc;
 540}
 541
 542
 543static int open_kcore(struct inode *inode, struct file *filp)
 544{
 545        if (!capable(CAP_SYS_RAWIO))
 546                return -EPERM;
 547        if (kcore_need_update)
 548                kcore_update_ram();
 549        if (i_size_read(inode) != proc_root_kcore->size) {
 550                mutex_lock(&inode->i_mutex);
 551                i_size_write(inode, proc_root_kcore->size);
 552                mutex_unlock(&inode->i_mutex);
 553        }
 554        return 0;
 555}
 556
 557
 558static const struct file_operations proc_kcore_operations = {
 559        .read           = read_kcore,
 560        .open           = open_kcore,
 561        .llseek         = default_llseek,
 562};
 563
 564#ifdef CONFIG_MEMORY_HOTPLUG
 565/* just remember that we have to update kcore */
 566static int __meminit kcore_callback(struct notifier_block *self,
 567                                    unsigned long action, void *arg)
 568{
 569        switch (action) {
 570        case MEM_ONLINE:
 571        case MEM_OFFLINE:
 572                write_lock(&kclist_lock);
 573                kcore_need_update = 1;
 574                write_unlock(&kclist_lock);
 575        }
 576        return NOTIFY_OK;
 577}
 578#endif
 579
 580
 581static struct kcore_list kcore_vmalloc;
 582
 583#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
 584static struct kcore_list kcore_text;
 585/*
 586 * If defined, special segment is used for mapping kernel text instead of
 587 * direct-map area. We need to create special TEXT section.
 588 */
 589static void __init proc_kcore_text_init(void)
 590{
 591        kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
 592}
 593#else
 594static void __init proc_kcore_text_init(void)
 595{
 596}
 597#endif
 598
 599#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
 600/*
 601 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
 602 */
 603struct kcore_list kcore_modules;
 604static void __init add_modules_range(void)
 605{
 606        kclist_add(&kcore_modules, (void *)MODULES_VADDR,
 607                        MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
 608}
 609#else
 610static void __init add_modules_range(void)
 611{
 612}
 613#endif
 614
 615static int __init proc_kcore_init(void)
 616{
 617        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 618                                      &proc_kcore_operations);
 619        if (!proc_root_kcore) {
 620                printk(KERN_ERR "couldn't create /proc/kcore\n");
 621                return 0; /* Always returns 0. */
 622        }
 623        /* Store text area if it's special */
 624        proc_kcore_text_init();
 625        /* Store vmalloc area */
 626        kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 627                VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 628        add_modules_range();
 629        /* Store direct-map area from physical memory map */
 630        kcore_update_ram();
 631        hotplug_memory_notifier(kcore_callback, 0);
 632
 633        return 0;
 634}
 635module_init(proc_kcore_init);
 636