linux/include/asm-i386/pgtable.h
<<
>>
Prefs
   1#ifndef _I386_PGTABLE_H
   2#define _I386_PGTABLE_H
   3
   4#include <linux/config.h>
   5
   6/*
   7 * The Linux memory management assumes a three-level page table setup. On
   8 * the i386, we use that, but "fold" the mid level into the top-level page
   9 * table, so that we physically have the same two-level page table as the
  10 * i386 mmu expects.
  11 *
  12 * This file contains the functions and defines necessary to modify and use
  13 * the i386 page table tree.
  14 */
  15#ifndef __ASSEMBLY__
  16#include <asm/processor.h>
  17#include <asm/fixmap.h>
  18#include <linux/threads.h>
  19
  20#ifndef _I386_BITOPS_H
  21#include <asm/bitops.h>
  22#endif
  23
  24#include <linux/slab.h>
  25#include <linux/list.h>
  26#include <linux/spinlock.h>
  27
  28/*
  29 * ZERO_PAGE is a global shared page that is always zero: used
  30 * for zero-mapped memory areas etc..
  31 */
  32#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
  33extern unsigned long empty_zero_page[1024];
  34extern pgd_t swapper_pg_dir[1024];
  35extern kmem_cache_t *pgd_cache;
  36extern kmem_cache_t *pmd_cache;
  37extern spinlock_t pgd_lock;
  38extern struct page *pgd_list;
  39
  40void pmd_ctor(void *, kmem_cache_t *, unsigned long);
  41void pgd_ctor(void *, kmem_cache_t *, unsigned long);
  42void pgd_dtor(void *, kmem_cache_t *, unsigned long);
  43void pgtable_cache_init(void);
  44void paging_init(void);
  45
  46/*
  47 * The Linux x86 paging architecture is 'compile-time dual-mode', it
  48 * implements both the traditional 2-level x86 page tables and the
  49 * newer 3-level PAE-mode page tables.
  50 */
  51#ifdef CONFIG_X86_PAE
  52# include <asm/pgtable-3level-defs.h>
  53# define PMD_SIZE       (1UL << PMD_SHIFT)
  54# define PMD_MASK       (~(PMD_SIZE-1))
  55#else
  56# include <asm/pgtable-2level-defs.h>
  57#endif
  58
  59#define PGDIR_SIZE      (1UL << PGDIR_SHIFT)
  60#define PGDIR_MASK      (~(PGDIR_SIZE-1))
  61
  62#define USER_PTRS_PER_PGD       (TASK_SIZE/PGDIR_SIZE)
  63#define FIRST_USER_PGD_NR       0
  64
  65#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
  66#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
  67
  68#define TWOLEVEL_PGDIR_SHIFT    22
  69#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
  70#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
  71
  72/* Just any arbitrary offset to the start of the vmalloc VM area: the
  73 * current 8MB value just means that there will be a 8MB "hole" after the
  74 * physical memory until the kernel virtual memory starts.  That means that
  75 * any out-of-bounds memory accesses will hopefully be caught.
  76 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
  77 * area for the same reason. ;)
  78 */
  79#define VMALLOC_OFFSET  (8*1024*1024)
  80#define VMALLOC_START   (((unsigned long) high_memory + vmalloc_earlyreserve + \
  81                        2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
  82#ifdef CONFIG_HIGHMEM
  83# define VMALLOC_END    (PKMAP_BASE-2*PAGE_SIZE)
  84#else
  85# define VMALLOC_END    (FIXADDR_START-2*PAGE_SIZE)
  86#endif
  87
  88/*
  89 * The 4MB page is guessing..  Detailed in the infamous "Chapter H"
  90 * of the Pentium details, but assuming intel did the straightforward
  91 * thing, this bit set in the page directory entry just means that
  92 * the page directory entry points directly to a 4MB-aligned block of
  93 * memory. 
  94 */
  95#define _PAGE_BIT_PRESENT       0
  96#define _PAGE_BIT_RW            1
  97#define _PAGE_BIT_USER          2
  98#define _PAGE_BIT_PWT           3
  99#define _PAGE_BIT_PCD           4
 100#define _PAGE_BIT_ACCESSED      5
 101#define _PAGE_BIT_DIRTY         6
 102#define _PAGE_BIT_PSE           7       /* 4 MB (or 2MB) page, Pentium+, if present.. */
 103#define _PAGE_BIT_GLOBAL        8       /* Global TLB entry PPro+ */
 104#define _PAGE_BIT_UNUSED1       9       /* available for programmer */
 105#define _PAGE_BIT_UNUSED2       10
 106#define _PAGE_BIT_UNUSED3       11
 107#define _PAGE_BIT_NX            63
 108
 109#define _PAGE_PRESENT   0x001
 110#define _PAGE_RW        0x002
 111#define _PAGE_USER      0x004
 112#define _PAGE_PWT       0x008
 113#define _PAGE_PCD       0x010
 114#define _PAGE_ACCESSED  0x020
 115#define _PAGE_DIRTY     0x040
 116#define _PAGE_PSE       0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
 117#define _PAGE_GLOBAL    0x100   /* Global TLB entry PPro+ */
 118#define _PAGE_UNUSED1   0x200   /* available for programmer */
 119#define _PAGE_UNUSED2   0x400
 120#define _PAGE_UNUSED3   0x800
 121
 122#define _PAGE_FILE      0x040   /* set:pagecache unset:swap */
 123#define _PAGE_PROTNONE  0x080   /* If not present */
 124#ifdef CONFIG_X86_PAE
 125#define _PAGE_NX        (1ULL<<_PAGE_BIT_NX)
 126#else
 127#define _PAGE_NX        0
 128#endif
 129
 130#define _PAGE_TABLE     (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 131#define _KERNPG_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 132#define _PAGE_CHG_MASK  (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 133
 134#define PAGE_NONE \
 135        __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 136#define PAGE_SHARED \
 137        __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
 138
 139#define PAGE_SHARED_EXEC \
 140        __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
 141#define PAGE_COPY_NOEXEC \
 142        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
 143#define PAGE_COPY_EXEC \
 144        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 145#define PAGE_COPY \
 146        PAGE_COPY_NOEXEC
 147#define PAGE_READONLY \
 148        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
 149#define PAGE_READONLY_EXEC \
 150        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 151
 152#define _PAGE_KERNEL \
 153        (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
 154#define _PAGE_KERNEL_EXEC \
 155        (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 156
 157extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 158#define __PAGE_KERNEL_RO                (__PAGE_KERNEL & ~_PAGE_RW)
 159#define __PAGE_KERNEL_NOCACHE           (__PAGE_KERNEL | _PAGE_PCD)
 160#define __PAGE_KERNEL_LARGE             (__PAGE_KERNEL | _PAGE_PSE)
 161#define __PAGE_KERNEL_LARGE_EXEC        (__PAGE_KERNEL_EXEC | _PAGE_PSE)
 162
 163#define PAGE_KERNEL             __pgprot(__PAGE_KERNEL)
 164#define PAGE_KERNEL_RO          __pgprot(__PAGE_KERNEL_RO)
 165#define PAGE_KERNEL_EXEC        __pgprot(__PAGE_KERNEL_EXEC)
 166#define PAGE_KERNEL_NOCACHE     __pgprot(__PAGE_KERNEL_NOCACHE)
 167#define PAGE_KERNEL_LARGE       __pgprot(__PAGE_KERNEL_LARGE)
 168#define PAGE_KERNEL_LARGE_EXEC  __pgprot(__PAGE_KERNEL_LARGE_EXEC)
 169
 170/*
 171 * The i386 can't do page protection for execute, and considers that
 172 * the same are read. Also, write permissions imply read permissions.
 173 * This is the closest we can get..
 174 */
 175#define __P000  PAGE_NONE
 176#define __P001  PAGE_READONLY
 177#define __P010  PAGE_COPY
 178#define __P011  PAGE_COPY
 179#define __P100  PAGE_READONLY_EXEC
 180#define __P101  PAGE_READONLY_EXEC
 181#define __P110  PAGE_COPY_EXEC
 182#define __P111  PAGE_COPY_EXEC
 183
 184#define __S000  PAGE_NONE
 185#define __S001  PAGE_READONLY
 186#define __S010  PAGE_SHARED
 187#define __S011  PAGE_SHARED
 188#define __S100  PAGE_READONLY_EXEC
 189#define __S101  PAGE_READONLY_EXEC
 190#define __S110  PAGE_SHARED_EXEC
 191#define __S111  PAGE_SHARED_EXEC
 192
 193/*
 194 * Define this if things work differently on an i386 and an i486:
 195 * it will (on an i486) warn about kernel memory accesses that are
 196 * done without a 'verify_area(VERIFY_WRITE,..)'
 197 */
 198#undef TEST_VERIFY_AREA
 199
 200/* The boot page tables (all created as a single array) */
 201extern unsigned long pg0[];
 202
 203#define pte_present(x)  ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
 204#define pte_clear(xp)   do { set_pte(xp, __pte(0)); } while (0)
 205
 206#define pmd_none(x)     (!pmd_val(x))
 207#define pmd_present(x)  (pmd_val(x) & _PAGE_PRESENT)
 208#define pmd_clear(xp)   do { set_pmd(xp, __pmd(0)); } while (0)
 209#define pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 210
 211
 212#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
 213
 214/*
 215 * The following only work if pte_present() is true.
 216 * Undefined behaviour if not..
 217 */
 218static inline int pte_user(pte_t pte)           { return (pte).pte_low & _PAGE_USER; }
 219static inline int pte_read(pte_t pte)           { return (pte).pte_low & _PAGE_USER; }
 220static inline int pte_dirty(pte_t pte)          { return (pte).pte_low & _PAGE_DIRTY; }
 221static inline int pte_young(pte_t pte)          { return (pte).pte_low & _PAGE_ACCESSED; }
 222static inline int pte_write(pte_t pte)          { return (pte).pte_low & _PAGE_RW; }
 223
 224/*
 225 * The following only works if pte_present() is not true.
 226 */
 227static inline int pte_file(pte_t pte)           { return (pte).pte_low & _PAGE_FILE; }
 228
 229static inline pte_t pte_rdprotect(pte_t pte)    { (pte).pte_low &= ~_PAGE_USER; return pte; }
 230static inline pte_t pte_exprotect(pte_t pte)    { (pte).pte_low &= ~_PAGE_USER; return pte; }
 231static inline pte_t pte_mkclean(pte_t pte)      { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
 232static inline pte_t pte_mkold(pte_t pte)        { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
 233static inline pte_t pte_wrprotect(pte_t pte)    { (pte).pte_low &= ~_PAGE_RW; return pte; }
 234static inline pte_t pte_mkread(pte_t pte)       { (pte).pte_low |= _PAGE_USER; return pte; }
 235static inline pte_t pte_mkexec(pte_t pte)       { (pte).pte_low |= _PAGE_USER; return pte; }
 236static inline pte_t pte_mkdirty(pte_t pte)      { (pte).pte_low |= _PAGE_DIRTY; return pte; }
 237static inline pte_t pte_mkyoung(pte_t pte)      { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 238static inline pte_t pte_mkwrite(pte_t pte)      { (pte).pte_low |= _PAGE_RW; return pte; }
 239
 240#ifdef CONFIG_X86_PAE
 241# include <asm/pgtable-3level.h>
 242#else
 243# include <asm/pgtable-2level.h>
 244#endif
 245
 246static inline int ptep_test_and_clear_dirty(pte_t *ptep)
 247{
 248        if (!pte_dirty(*ptep))
 249                return 0;
 250        return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
 251}
 252
 253static inline int ptep_test_and_clear_young(pte_t *ptep)
 254{
 255        if (!pte_young(*ptep))
 256                return 0;
 257        return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
 258}
 259
 260static inline void ptep_set_wrprotect(pte_t *ptep)              { clear_bit(_PAGE_BIT_RW, &ptep->pte_low); }
 261static inline void ptep_mkdirty(pte_t *ptep)                    { set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
 262
 263/*
 264 * Macro to mark a page protection value as "uncacheable".  On processors which do not support
 265 * it, this is a no-op.
 266 */
 267#define pgprot_noncached(prot)  ((boot_cpu_data.x86 > 3)                                          \
 268                                 ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
 269
 270/*
 271 * Conversion functions: convert a page and protection to a page entry,
 272 * and a page entry and page directory to the page they refer to.
 273 */
 274
 275#define mk_pte(page, pgprot)    pfn_pte(page_to_pfn(page), (pgprot))
 276#define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
 277
 278static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 279{
 280        pte.pte_low &= _PAGE_CHG_MASK;
 281        pte.pte_low |= pgprot_val(newprot);
 282#ifdef CONFIG_X86_PAE
 283        /*
 284         * Chop off the NX bit (if present), and add the NX portion of
 285         * the newprot (if present):
 286         */
 287        pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
 288        pte.pte_high |= (pgprot_val(newprot) >> 32) & \
 289                                        (__supported_pte_mask >> 32);
 290#endif
 291        return pte;
 292}
 293
 294#define page_pte(page) page_pte_prot(page, __pgprot(0))
 295
 296#define pmd_large(pmd) \
 297((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
 298
 299/*
 300 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
 301 *
 302 * this macro returns the index of the entry in the pgd page which would
 303 * control the given virtual address
 304 */
 305#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
 306#define pgd_index_k(addr) pgd_index(addr)
 307
 308/*
 309 * pgd_offset() returns a (pgd_t *)
 310 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
 311 */
 312#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
 313
 314/*
 315 * a shortcut which implies the use of the kernel's pgd, instead
 316 * of a process's
 317 */
 318#define pgd_offset_k(address) pgd_offset(&init_mm, address)
 319
 320/*
 321 * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
 322 *
 323 * this macro returns the index of the entry in the pmd page which would
 324 * control the given virtual address
 325 */
 326#define pmd_index(address) \
 327                (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 328
 329/*
 330 * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
 331 *
 332 * this macro returns the index of the entry in the pte page which would
 333 * control the given virtual address
 334 */
 335#define pte_index(address) \
 336                (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 337#define pte_offset_kernel(dir, address) \
 338        ((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
 339
 340/*
 341 * Helper function that returns the kernel pagetable entry controlling
 342 * the virtual address 'address'. NULL means no pagetable entry present.
 343 * NOTE: the return type is pte_t but if the pmd is PSE then we return it
 344 * as a pte too.
 345 */
 346extern pte_t *lookup_address(unsigned long address);
 347
 348/*
 349 * Make a given kernel text page executable/non-executable.
 350 * Returns the previous executability setting of that page (which
 351 * is used to restore the previous state). Used by the SMP bootup code.
 352 * NOTE: this is an __init function for security reasons.
 353 */
 354#ifdef CONFIG_X86_PAE
 355 extern int set_kernel_exec(unsigned long vaddr, int enable);
 356#else
 357 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
 358#endif
 359
 360extern void noexec_setup(const char *str);
 361
 362#if defined(CONFIG_HIGHPTE)
 363#define pte_offset_map(dir, address) \
 364        ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
 365#define pte_offset_map_nested(dir, address) \
 366        ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
 367#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
 368#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
 369#else
 370#define pte_offset_map(dir, address) \
 371        ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
 372#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
 373#define pte_unmap(pte) do { } while (0)
 374#define pte_unmap_nested(pte) do { } while (0)
 375#endif
 376
 377/*
 378 * The i386 doesn't have any external MMU info: the kernel page
 379 * tables contain all the necessary information.
 380 *
 381 * Also, we only update the dirty/accessed state if we set
 382 * the dirty bit by hand in the kernel, since the hardware
 383 * will do the accessed bit for us, and we don't want to
 384 * race with other CPU's that might be updating the dirty
 385 * bit at the same time.
 386 */
 387#define update_mmu_cache(vma,address,pte) do { } while (0)
 388#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 389#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 390        do {                                                              \
 391                if (__dirty) {                                            \
 392                        (__ptep)->pte_low = (__entry).pte_low;            \
 393                        flush_tlb_page(__vma, __address);                 \
 394                }                                                         \
 395        } while (0)
 396
 397#endif /* !__ASSEMBLY__ */
 398
 399#ifndef CONFIG_DISCONTIGMEM
 400#define kern_addr_valid(addr)   (1)
 401#endif /* !CONFIG_DISCONTIGMEM */
 402
 403#define io_remap_page_range(vma, vaddr, paddr, size, prot)              \
 404                remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
 405
 406#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 407#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
 408#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 409#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 410#define __HAVE_ARCH_PTEP_MKDIRTY
 411#define __HAVE_ARCH_PTE_SAME
 412#include <asm-generic/pgtable.h>
 413
 414#endif /* _I386_PGTABLE_H */
 415
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.