linux/arch/x86/mm/dump_pagetables.c
<<
>>
Prefs
   1/*
   2 * Debug helper to dump the current kernel pagetables of the system
   3 * so that we can see what the various memory ranges are set to.
   4 *
   5 * (C) Copyright 2008 Intel Corporation
   6 *
   7 * Author: Arjan van de Ven <arjan@linux.intel.com>
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License
  11 * as published by the Free Software Foundation; version 2
  12 * of the License.
  13 */
  14
  15#include <linux/debugfs.h>
  16#include <linux/mm.h>
  17#include <linux/module.h>
  18#include <linux/seq_file.h>
  19
  20#include <asm/pgtable.h>
  21
  22/*
  23 * The dumper groups pagetable entries of the same type into one, and for
  24 * that it needs to keep some state when walking, and flush this state
  25 * when a "break" in the continuity is found.
  26 */
  27struct pg_state {
  28        int level;
  29        pgprot_t current_prot;
  30        unsigned long start_address;
  31        unsigned long current_address;
  32        const struct addr_marker *marker;
  33};
  34
  35struct addr_marker {
  36        unsigned long start_address;
  37        const char *name;
  38};
  39
  40/* Address space markers hints */
  41static struct addr_marker address_markers[] = {
  42        { 0, "User Space" },
  43#ifdef CONFIG_X86_64
  44        { 0x8000000000000000UL, "Kernel Space" },
  45        { PAGE_OFFSET,          "Low Kernel Mapping" },
  46        { VMALLOC_START,        "vmalloc() Area" },
  47        { VMEMMAP_START,        "Vmemmap" },
  48        { __START_KERNEL_map,   "High Kernel Mapping" },
  49        { MODULES_VADDR,        "Modules" },
  50        { MODULES_END,          "End Modules" },
  51#else
  52        { PAGE_OFFSET,          "Kernel Mapping" },
  53        { 0/* VMALLOC_START */, "vmalloc() Area" },
  54        { 0/*VMALLOC_END*/,     "vmalloc() End" },
  55# ifdef CONFIG_HIGHMEM
  56        { 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
  57# endif
  58        { 0/*FIXADDR_START*/,   "Fixmap Area" },
  59#endif
  60        { -1, NULL }            /* End of list */
  61};
  62
  63/* Multipliers for offsets within the PTEs */
  64#define PTE_LEVEL_MULT (PAGE_SIZE)
  65#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
  66#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
  67#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
  68
  69/*
  70 * Print a readable form of a pgprot_t to the seq_file
  71 */
  72static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
  73{
  74        pgprotval_t pr = pgprot_val(prot);
  75        static const char * const level_name[] =
  76                { "cr3", "pgd", "pud", "pmd", "pte" };
  77
  78        if (!pgprot_val(prot)) {
  79                /* Not present */
  80                seq_printf(m, "                          ");
  81        } else {
  82                if (pr & _PAGE_USER)
  83                        seq_printf(m, "USR ");
  84                else
  85                        seq_printf(m, "    ");
  86                if (pr & _PAGE_RW)
  87                        seq_printf(m, "RW ");
  88                else
  89                        seq_printf(m, "ro ");
  90                if (pr & _PAGE_PWT)
  91                        seq_printf(m, "PWT ");
  92                else
  93                        seq_printf(m, "    ");
  94                if (pr & _PAGE_PCD)
  95                        seq_printf(m, "PCD ");
  96                else
  97                        seq_printf(m, "    ");
  98
  99                /* Bit 9 has a different meaning on level 3 vs 4 */
 100                if (level <= 3) {
 101                        if (pr & _PAGE_PSE)
 102                                seq_printf(m, "PSE ");
 103                        else
 104                                seq_printf(m, "    ");
 105                } else {
 106                        if (pr & _PAGE_PAT)
 107                                seq_printf(m, "pat ");
 108                        else
 109                                seq_printf(m, "    ");
 110                }
 111                if (pr & _PAGE_GLOBAL)
 112                        seq_printf(m, "GLB ");
 113                else
 114                        seq_printf(m, "    ");
 115                if (pr & _PAGE_NX)
 116                        seq_printf(m, "NX ");
 117                else
 118                        seq_printf(m, "x  ");
 119        }
 120        seq_printf(m, "%s\n", level_name[level]);
 121}
 122
 123/*
 124 * On 64 bits, sign-extend the 48 bit address to 64 bit
 125 */
 126static unsigned long normalize_addr(unsigned long u)
 127{
 128#ifdef CONFIG_X86_64
 129        return (signed long)(u << 16) >> 16;
 130#else
 131        return u;
 132#endif
 133}
 134
 135/*
 136 * This function gets called on a break in a continuous series
 137 * of PTE entries; the next one is different so we need to
 138 * print what we collected so far.
 139 */
 140static void note_page(struct seq_file *m, struct pg_state *st,
 141                      pgprot_t new_prot, int level)
 142{
 143        pgprotval_t prot, cur;
 144        static const char units[] = "KMGTPE";
 145
 146        /*
 147         * If we have a "break" in the series, we need to flush the state that
 148         * we have now. "break" is either changing perms, levels or
 149         * address space marker.
 150         */
 151        prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
 152        cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
 153
 154        if (!st->level) {
 155                /* First entry */
 156                st->current_prot = new_prot;
 157                st->level = level;
 158                st->marker = address_markers;
 159                seq_printf(m, "---[ %s ]---\n", st->marker->name);
 160        } else if (prot != cur || level != st->level ||
 161                   st->current_address >= st->marker[1].start_address) {
 162                const char *unit = units;
 163                unsigned long delta;
 164                int width = sizeof(unsigned long) * 2;
 165
 166                /*
 167                 * Now print the actual finished series
 168                 */
 169                seq_printf(m, "0x%0*lx-0x%0*lx   ",
 170                           width, st->start_address,
 171                           width, st->current_address);
 172
 173                delta = (st->current_address - st->start_address) >> 10;
 174                while (!(delta & 1023) && unit[1]) {
 175                        delta >>= 10;
 176                        unit++;
 177                }
 178                seq_printf(m, "%9lu%c ", delta, *unit);
 179                printk_prot(m, st->current_prot, st->level);
 180
 181                /*
 182                 * We print markers for special areas of address space,
 183                 * such as the start of vmalloc space etc.
 184                 * This helps in the interpretation.
 185                 */
 186                if (st->current_address >= st->marker[1].start_address) {
 187                        st->marker++;
 188                        seq_printf(m, "---[ %s ]---\n", st->marker->name);
 189                }
 190
 191                st->start_address = st->current_address;
 192                st->current_prot = new_prot;
 193                st->level = level;
 194        }
 195}
 196
 197static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 198                                                        unsigned long P)
 199{
 200        int i;
 201        pte_t *start;
 202
 203        start = (pte_t *) pmd_page_vaddr(addr);
 204        for (i = 0; i < PTRS_PER_PTE; i++) {
 205                pgprot_t prot = pte_pgprot(*start);
 206
 207                st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
 208                note_page(m, st, prot, 4);
 209                start++;
 210        }
 211}
 212
 213#if PTRS_PER_PMD > 1
 214
 215static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 216                                                        unsigned long P)
 217{
 218        int i;
 219        pmd_t *start;
 220
 221        start = (pmd_t *) pud_page_vaddr(addr);
 222        for (i = 0; i < PTRS_PER_PMD; i++) {
 223                st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 224                if (!pmd_none(*start)) {
 225                        pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
 226
 227                        if (pmd_large(*start) || !pmd_present(*start))
 228                                note_page(m, st, __pgprot(prot), 3);
 229                        else
 230                                walk_pte_level(m, st, *start,
 231                                               P + i * PMD_LEVEL_MULT);
 232                } else
 233                        note_page(m, st, __pgprot(0), 3);
 234                start++;
 235        }
 236}
 237
 238#else
 239#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
 240#define pud_large(a) pmd_large(__pmd(pud_val(a)))
 241#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
 242#endif
 243
 244#if PTRS_PER_PUD > 1
 245
 246static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 247                                                        unsigned long P)
 248{
 249        int i;
 250        pud_t *start;
 251
 252        start = (pud_t *) pgd_page_vaddr(addr);
 253
 254        for (i = 0; i < PTRS_PER_PUD; i++) {
 255                st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
 256                if (!pud_none(*start)) {
 257                        pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
 258
 259                        if (pud_large(*start) || !pud_present(*start))
 260                                note_page(m, st, __pgprot(prot), 2);
 261                        else
 262                                walk_pmd_level(m, st, *start,
 263                                               P + i * PUD_LEVEL_MULT);
 264                } else
 265                        note_page(m, st, __pgprot(0), 2);
 266
 267                start++;
 268        }
 269}
 270
 271#else
 272#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
 273#define pgd_large(a) pud_large(__pud(pgd_val(a)))
 274#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 275#endif
 276
 277static void walk_pgd_level(struct seq_file *m)
 278{
 279#ifdef CONFIG_X86_64
 280        pgd_t *start = (pgd_t *) &init_level4_pgt;
 281#else
 282        pgd_t *start = swapper_pg_dir;
 283#endif
 284        int i;
 285        struct pg_state st;
 286
 287        memset(&st, 0, sizeof(st));
 288
 289        for (i = 0; i < PTRS_PER_PGD; i++) {
 290                st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
 291                if (!pgd_none(*start)) {
 292                        pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
 293
 294                        if (pgd_large(*start) || !pgd_present(*start))
 295                                note_page(m, &st, __pgprot(prot), 1);
 296                        else
 297                                walk_pud_level(m, &st, *start,
 298                                               i * PGD_LEVEL_MULT);
 299                } else
 300                        note_page(m, &st, __pgprot(0), 1);
 301
 302                start++;
 303        }
 304
 305        /* Flush out the last page */
 306        st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
 307        note_page(m, &st, __pgprot(0), 0);
 308}
 309
 310static int ptdump_show(struct seq_file *m, void *v)
 311{
 312        walk_pgd_level(m);
 313        return 0;
 314}
 315
 316static int ptdump_open(struct inode *inode, struct file *filp)
 317{
 318        return single_open(filp, ptdump_show, NULL);
 319}
 320
 321static const struct file_operations ptdump_fops = {
 322        .open           = ptdump_open,
 323        .read           = seq_read,
 324        .llseek         = seq_lseek,
 325        .release        = single_release,
 326};
 327
 328static int pt_dump_init(void)
 329{
 330        struct dentry *pe;
 331
 332#ifdef CONFIG_X86_32
 333        /* Not a compile-time constant on x86-32 */
 334        address_markers[2].start_address = VMALLOC_START;
 335        address_markers[3].start_address = VMALLOC_END;
 336# ifdef CONFIG_HIGHMEM
 337        address_markers[4].start_address = PKMAP_BASE;
 338        address_markers[5].start_address = FIXADDR_START;
 339# else
 340        address_markers[4].start_address = FIXADDR_START;
 341# endif
 342#endif
 343
 344        pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
 345                                 &ptdump_fops);
 346        if (!pe)
 347                return -ENOMEM;
 348
 349        return 0;
 350}
 351
 352__initcall(pt_dump_init);
 353MODULE_LICENSE("GPL");
 354MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
 355MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
 356
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.