linux/arch/x86/mm/dump_pagetables.c
<<
>>
Prefs
   1/*
   2 * Debug helper to dump the current kernel pagetables of the system
   3 * so that we can see what the various memory ranges are set to.
   4 *
   5 * (C) Copyright 2008 Intel Corporation
   6 *
   7 * Author: Arjan van de Ven <arjan@linux.intel.com>
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License
  11 * as published by the Free Software Foundation; version 2
  12 * of the License.
  13 */
  14
  15#include <linux/debugfs.h>
  16#include <linux/mm.h>
  17#include <linux/module.h>
  18#include <linux/seq_file.h>
  19
  20#include <asm/pgtable.h>
  21
  22/*
  23 * The dumper groups pagetable entries of the same type into one, and for
  24 * that it needs to keep some state when walking, and flush this state
  25 * when a "break" in the continuity is found.
  26 */
  27struct pg_state {
  28        int level;
  29        pgprot_t current_prot;
  30        unsigned long start_address;
  31        unsigned long current_address;
  32        const struct addr_marker *marker;
  33};
  34
  35struct addr_marker {
  36        unsigned long start_address;
  37        const char *name;
  38};
  39
  40/* Address space markers hints */
  41static struct addr_marker address_markers[] = {
  42        { 0, "User Space" },
  43#ifdef CONFIG_X86_64
  44        { 0x8000000000000000UL, "Kernel Space" },
  45        { PAGE_OFFSET,          "Low Kernel Mapping" },
  46        { VMALLOC_START,        "vmalloc() Area" },
  47        { VMEMMAP_START,        "Vmemmap" },
  48        { __START_KERNEL_map,   "High Kernel Mapping" },
  49        { MODULES_VADDR,        "Modules" },
  50        { MODULES_END,          "End Modules" },
  51#else
  52        { PAGE_OFFSET,          "Kernel Mapping" },
  53        { 0/* VMALLOC_START */, "vmalloc() Area" },
  54        { 0/*VMALLOC_END*/,     "vmalloc() End" },
  55# ifdef CONFIG_HIGHMEM
  56        { 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
  57# endif
  58        { 0/*FIXADDR_START*/,   "Fixmap Area" },
  59#endif
  60        { -1, NULL }            /* End of list */
  61};
  62
  63/* Multipliers for offsets within the PTEs */
  64#define PTE_LEVEL_MULT (PAGE_SIZE)
  65#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
  66#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
  67#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
  68
  69/*
  70 * Print a readable form of a pgprot_t to the seq_file
  71 */
  72static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
  73{
  74        pgprotval_t pr = pgprot_val(prot);
  75        static const char * const level_name[] =
  76                { "cr3", "pgd", "pud", "pmd", "pte" };
  77
  78        if (!pgprot_val(prot)) {
  79                /* Not present */
  80                seq_printf(m, "                          ");
  81        } else {
  82                if (pr & _PAGE_USER)
  83                        seq_printf(m, "USR ");
  84                else
  85                        seq_printf(m, "    ");
  86                if (pr & _PAGE_RW)
  87                        seq_printf(m, "RW ");
  88                else
  89                        seq_printf(m, "ro ");
  90                if (pr & _PAGE_PWT)
  91                        seq_printf(m, "PWT ");
  92                else
  93                        seq_printf(m, "    ");
  94                if (pr & _PAGE_PCD)
  95                        seq_printf(m, "PCD ");
  96                else
  97                        seq_printf(m, "    ");
  98
  99                /* Bit 9 has a different meaning on level 3 vs 4 */
 100                if (level <= 3) {
 101                        if (pr & _PAGE_PSE)
 102                                seq_printf(m, "PSE ");
 103                        else
 104                                seq_printf(m, "    ");
 105                } else {
 106                        if (pr & _PAGE_PAT)
 107                                seq_printf(m, "pat ");
 108                        else
 109                                seq_printf(m, "    ");
 110                }
 111                if (pr & _PAGE_GLOBAL)
 112                        seq_printf(m, "GLB ");
 113                else
 114                        seq_printf(m, "    ");
 115                if (pr & _PAGE_NX)
 116                        seq_printf(m, "NX ");
 117                else
 118                        seq_printf(m, "x  ");
 119        }
 120        seq_printf(m, "%s\n", level_name[level]);
 121}
 122
 123/*
 124 * On 64 bits, sign-extend the 48 bit address to 64 bit
 125 */
 126static unsigned long normalize_addr(unsigned long u)
 127{
 128#ifdef CONFIG_X86_64
 129        return (signed long)(u << 16) >> 16;
 130#else
 131        return u;
 132#endif
 133}
 134
 135/*
 136 * This function gets called on a break in a continuous series
 137 * of PTE entries; the next one is different so we need to
 138 * print what we collected so far.
 139 */
 140static void note_page(struct seq_file *m, struct pg_state *st,
 141                      pgprot_t new_prot, int level)
 142{
 143        pgprotval_t prot, cur;
 144        static const char units[] = "KMGTPE";
 145
 146        /*
 147         * If we have a "break" in the series, we need to flush the state that
 148         * we have now. "break" is either changing perms, levels or
 149         * address space marker.
 150         */
 151        prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
 152        cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
 153
 154        if (!st->level) {
 155                /* First entry */
 156                st->current_prot = new_prot;
 157                st->level = level;
 158                st->marker = address_markers;
 159                seq_printf(m, "---[ %s ]---\n", st->marker->name);
 160        } else if (prot != cur || level != st->level ||
 161                   st->current_address >= st->marker[1].start_address) {
 162                const char *unit = units;
 163                unsigned long delta;
 164
 165                /*
 166                 * Now print the actual finished series
 167                 */
 168                seq_printf(m, "0x%p-0x%p   ",
 169                           (void *)st->start_address,
 170                           (void *)st->current_address);
 171
 172                delta = (st->current_address - st->start_address) >> 10;
 173                while (!(delta & 1023) && unit[1]) {
 174                        delta >>= 10;
 175                        unit++;
 176                }
 177                seq_printf(m, "%9lu%c ", delta, *unit);
 178                printk_prot(m, st->current_prot, st->level);
 179
 180                /*
 181                 * We print markers for special areas of address space,
 182                 * such as the start of vmalloc space etc.
 183                 * This helps in the interpretation.
 184                 */
 185                if (st->current_address >= st->marker[1].start_address) {
 186                        st->marker++;
 187                        seq_printf(m, "---[ %s ]---\n", st->marker->name);
 188                }
 189
 190                st->start_address = st->current_address;
 191                st->current_prot = new_prot;
 192                st->level = level;
 193        }
 194}
 195
 196static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 197                                                        unsigned long P)
 198{
 199        int i;
 200        pte_t *start;
 201
 202        start = (pte_t *) pmd_page_vaddr(addr);
 203        for (i = 0; i < PTRS_PER_PTE; i++) {
 204                pgprot_t prot = pte_pgprot(*start);
 205
 206                st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
 207                note_page(m, st, prot, 4);
 208                start++;
 209        }
 210}
 211
 212#if PTRS_PER_PMD > 1
 213
 214static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 215                                                        unsigned long P)
 216{
 217        int i;
 218        pmd_t *start;
 219
 220        start = (pmd_t *) pud_page_vaddr(addr);
 221        for (i = 0; i < PTRS_PER_PMD; i++) {
 222                st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 223                if (!pmd_none(*start)) {
 224                        pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
 225
 226                        if (pmd_large(*start) || !pmd_present(*start))
 227                                note_page(m, st, __pgprot(prot), 3);
 228                        else
 229                                walk_pte_level(m, st, *start,
 230                                               P + i * PMD_LEVEL_MULT);
 231                } else
 232                        note_page(m, st, __pgprot(0), 3);
 233                start++;
 234        }
 235}
 236
 237#else
 238#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
 239#define pud_large(a) pmd_large(__pmd(pud_val(a)))
 240#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
 241#endif
 242
 243#if PTRS_PER_PUD > 1
 244
 245static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 246                                                        unsigned long P)
 247{
 248        int i;
 249        pud_t *start;
 250
 251        start = (pud_t *) pgd_page_vaddr(addr);
 252
 253        for (i = 0; i < PTRS_PER_PUD; i++) {
 254                st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
 255                if (!pud_none(*start)) {
 256                        pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
 257
 258                        if (pud_large(*start) || !pud_present(*start))
 259                                note_page(m, st, __pgprot(prot), 2);
 260                        else
 261                                walk_pmd_level(m, st, *start,
 262                                               P + i * PUD_LEVEL_MULT);
 263                } else
 264                        note_page(m, st, __pgprot(0), 2);
 265
 266                start++;
 267        }
 268}
 269
 270#else
 271#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
 272#define pgd_large(a) pud_large(__pud(pgd_val(a)))
 273#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 274#endif
 275
 276static void walk_pgd_level(struct seq_file *m)
 277{
 278#ifdef CONFIG_X86_64
 279        pgd_t *start = (pgd_t *) &init_level4_pgt;
 280#else
 281        pgd_t *start = swapper_pg_dir;
 282#endif
 283        int i;
 284        struct pg_state st;
 285
 286        memset(&st, 0, sizeof(st));
 287
 288        for (i = 0; i < PTRS_PER_PGD; i++) {
 289                st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
 290                if (!pgd_none(*start)) {
 291                        pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
 292
 293                        if (pgd_large(*start) || !pgd_present(*start))
 294                                note_page(m, &st, __pgprot(prot), 1);
 295                        else
 296                                walk_pud_level(m, &st, *start,
 297                                               i * PGD_LEVEL_MULT);
 298                } else
 299                        note_page(m, &st, __pgprot(0), 1);
 300
 301                start++;
 302        }
 303
 304        /* Flush out the last page */
 305        st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
 306        note_page(m, &st, __pgprot(0), 0);
 307}
 308
 309static int ptdump_show(struct seq_file *m, void *v)
 310{
 311        walk_pgd_level(m);
 312        return 0;
 313}
 314
 315static int ptdump_open(struct inode *inode, struct file *filp)
 316{
 317        return single_open(filp, ptdump_show, NULL);
 318}
 319
 320static const struct file_operations ptdump_fops = {
 321        .open           = ptdump_open,
 322        .read           = seq_read,
 323        .llseek         = seq_lseek,
 324        .release        = single_release,
 325};
 326
 327static int pt_dump_init(void)
 328{
 329        struct dentry *pe;
 330
 331#ifdef CONFIG_X86_32
 332        /* Not a compile-time constant on x86-32 */
 333        address_markers[2].start_address = VMALLOC_START;
 334        address_markers[3].start_address = VMALLOC_END;
 335# ifdef CONFIG_HIGHMEM
 336        address_markers[4].start_address = PKMAP_BASE;
 337        address_markers[5].start_address = FIXADDR_START;
 338# else
 339        address_markers[4].start_address = FIXADDR_START;
 340# endif
 341#endif
 342
 343        pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
 344                                 &ptdump_fops);
 345        if (!pe)
 346                return -ENOMEM;
 347
 348        return 0;
 349}
 350
 351__initcall(pt_dump_init);
 352MODULE_LICENSE("GPL");
 353MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
 354MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
 355