linux/include/asm-generic/pgtable.h
<<
>>
Prefs
   1#ifndef _ASM_GENERIC_PGTABLE_H
   2#define _ASM_GENERIC_PGTABLE_H
   3
   4#ifndef __ASSEMBLY__
   5#ifdef CONFIG_MMU
   6
   7#include <linux/mm_types.h>
   8#include <linux/bug.h>
   9
  10#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  11extern int ptep_set_access_flags(struct vm_area_struct *vma,
  12                                 unsigned long address, pte_t *ptep,
  13                                 pte_t entry, int dirty);
  14#endif
  15
  16#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  17extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  18                                 unsigned long address, pmd_t *pmdp,
  19                                 pmd_t entry, int dirty);
  20#endif
  21
  22#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  23static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  24                                            unsigned long address,
  25                                            pte_t *ptep)
  26{
  27        pte_t pte = *ptep;
  28        int r = 1;
  29        if (!pte_young(pte))
  30                r = 0;
  31        else
  32                set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  33        return r;
  34}
  35#endif
  36
  37#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  38#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  39static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  40                                            unsigned long address,
  41                                            pmd_t *pmdp)
  42{
  43        pmd_t pmd = *pmdp;
  44        int r = 1;
  45        if (!pmd_young(pmd))
  46                r = 0;
  47        else
  48                set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  49        return r;
  50}
  51#else /* CONFIG_TRANSPARENT_HUGEPAGE */
  52static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  53                                            unsigned long address,
  54                                            pmd_t *pmdp)
  55{
  56        BUG();
  57        return 0;
  58}
  59#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  60#endif
  61
  62#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  63int ptep_clear_flush_young(struct vm_area_struct *vma,
  64                           unsigned long address, pte_t *ptep);
  65#endif
  66
  67#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
  68int pmdp_clear_flush_young(struct vm_area_struct *vma,
  69                           unsigned long address, pmd_t *pmdp);
  70#endif
  71
  72#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
  73static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
  74                                       unsigned long address,
  75                                       pte_t *ptep)
  76{
  77        pte_t pte = *ptep;
  78        pte_clear(mm, address, ptep);
  79        return pte;
  80}
  81#endif
  82
  83#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR
  84#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  85static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
  86                                       unsigned long address,
  87                                       pmd_t *pmdp)
  88{
  89        pmd_t pmd = *pmdp;
  90        pmd_clear(pmdp);
  91        return pmd;
  92}
  93#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  94#endif
  95
  96#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  97static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
  98                                            unsigned long address, pte_t *ptep,
  99                                            int full)
 100{
 101        pte_t pte;
 102        pte = ptep_get_and_clear(mm, address, ptep);
 103        return pte;
 104}
 105#endif
 106
 107/*
 108 * Some architectures may be able to avoid expensive synchronization
 109 * primitives when modifications are made to PTE's which are already
 110 * not present, or in the process of an address space destruction.
 111 */
 112#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
 113static inline void pte_clear_not_present_full(struct mm_struct *mm,
 114                                              unsigned long address,
 115                                              pte_t *ptep,
 116                                              int full)
 117{
 118        pte_clear(mm, address, ptep);
 119}
 120#endif
 121
 122#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 123extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 124                              unsigned long address,
 125                              pte_t *ptep);
 126#endif
 127
 128#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH
 129extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
 130                              unsigned long address,
 131                              pmd_t *pmdp);
 132#endif
 133
 134#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
 135struct mm_struct;
 136static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 137{
 138        pte_t old_pte = *ptep;
 139        set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 140}
 141#endif
 142
 143#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 144#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 145static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 146                                      unsigned long address, pmd_t *pmdp)
 147{
 148        pmd_t old_pmd = *pmdp;
 149        set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
 150}
 151#else /* CONFIG_TRANSPARENT_HUGEPAGE */
 152static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 153                                      unsigned long address, pmd_t *pmdp)
 154{
 155        BUG();
 156}
 157#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 158#endif
 159
 160#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
 161extern void pmdp_splitting_flush(struct vm_area_struct *vma,
 162                                 unsigned long address, pmd_t *pmdp);
 163#endif
 164
 165#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
 166extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
 167#endif
 168
 169#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
 170extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
 171#endif
 172
 173#ifndef __HAVE_ARCH_PMDP_INVALIDATE
 174extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 175                            pmd_t *pmdp);
 176#endif
 177
 178#ifndef __HAVE_ARCH_PTE_SAME
 179static inline int pte_same(pte_t pte_a, pte_t pte_b)
 180{
 181        return pte_val(pte_a) == pte_val(pte_b);
 182}
 183#endif
 184
 185#ifndef __HAVE_ARCH_PMD_SAME
 186#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 187static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 188{
 189        return pmd_val(pmd_a) == pmd_val(pmd_b);
 190}
 191#else /* CONFIG_TRANSPARENT_HUGEPAGE */
 192static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 193{
 194        BUG();
 195        return 0;
 196}
 197#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 198#endif
 199
 200#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 201#define page_test_and_clear_dirty(pfn, mapped)  (0)
 202#endif
 203
 204#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 205#define pte_maybe_dirty(pte)            pte_dirty(pte)
 206#else
 207#define pte_maybe_dirty(pte)            (1)
 208#endif
 209
 210#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 211#define page_test_and_clear_young(pfn) (0)
 212#endif
 213
 214#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 215#define pgd_offset_gate(mm, addr)       pgd_offset(mm, addr)
 216#endif
 217
 218#ifndef __HAVE_ARCH_MOVE_PTE
 219#define move_pte(pte, prot, old_addr, new_addr) (pte)
 220#endif
 221
 222#ifndef flush_tlb_fix_spurious_fault
 223#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 224#endif
 225
 226#ifndef pgprot_noncached
 227#define pgprot_noncached(prot)  (prot)
 228#endif
 229
 230#ifndef pgprot_writecombine
 231#define pgprot_writecombine pgprot_noncached
 232#endif
 233
 234/*
 235 * When walking page tables, get the address of the next boundary,
 236 * or the end address of the range if that comes earlier.  Although no
 237 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 238 */
 239
 240#define pgd_addr_end(addr, end)                                         \
 241({      unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
 242        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 243})
 244
 245#ifndef pud_addr_end
 246#define pud_addr_end(addr, end)                                         \
 247({      unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
 248        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 249})
 250#endif
 251
 252#ifndef pmd_addr_end
 253#define pmd_addr_end(addr, end)                                         \
 254({      unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
 255        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 256})
 257#endif
 258
 259/*
 260 * When walking page tables, we usually want to skip any p?d_none entries;
 261 * and any p?d_bad entries - reporting the error before resetting to none.
 262 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 263 */
 264void pgd_clear_bad(pgd_t *);
 265void pud_clear_bad(pud_t *);
 266void pmd_clear_bad(pmd_t *);
 267
 268static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 269{
 270        if (pgd_none(*pgd))
 271                return 1;
 272        if (unlikely(pgd_bad(*pgd))) {
 273                pgd_clear_bad(pgd);
 274                return 1;
 275        }
 276        return 0;
 277}
 278
 279static inline int pud_none_or_clear_bad(pud_t *pud)
 280{
 281        if (pud_none(*pud))
 282                return 1;
 283        if (unlikely(pud_bad(*pud))) {
 284                pud_clear_bad(pud);
 285                return 1;
 286        }
 287        return 0;
 288}
 289
 290static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 291{
 292        if (pmd_none(*pmd))
 293                return 1;
 294        if (unlikely(pmd_bad(*pmd))) {
 295                pmd_clear_bad(pmd);
 296                return 1;
 297        }
 298        return 0;
 299}
 300
 301static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
 302                                             unsigned long addr,
 303                                             pte_t *ptep)
 304{
 305        /*
 306         * Get the current pte state, but zero it out to make it
 307         * non-present, preventing the hardware from asynchronously
 308         * updating it.
 309         */
 310        return ptep_get_and_clear(mm, addr, ptep);
 311}
 312
 313static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
 314                                             unsigned long addr,
 315                                             pte_t *ptep, pte_t pte)
 316{
 317        /*
 318         * The pte is non-present, so there's no hardware state to
 319         * preserve.
 320         */
 321        set_pte_at(mm, addr, ptep, pte);
 322}
 323
 324#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 325/*
 326 * Start a pte protection read-modify-write transaction, which
 327 * protects against asynchronous hardware modifications to the pte.
 328 * The intention is not to prevent the hardware from making pte
 329 * updates, but to prevent any updates it may make from being lost.
 330 *
 331 * This does not protect against other software modifications of the
 332 * pte; the appropriate pte lock must be held over the transation.
 333 *
 334 * Note that this interface is intended to be batchable, meaning that
 335 * ptep_modify_prot_commit may not actually update the pte, but merely
 336 * queue the update to be done at some later time.  The update must be
 337 * actually committed before the pte lock is released, however.
 338 */
 339static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 340                                           unsigned long addr,
 341                                           pte_t *ptep)
 342{
 343        return __ptep_modify_prot_start(mm, addr, ptep);
 344}
 345
 346/*
 347 * Commit an update to a pte, leaving any hardware-controlled bits in
 348 * the PTE unmodified.
 349 */
 350static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 351                                           unsigned long addr,
 352                                           pte_t *ptep, pte_t pte)
 353{
 354        __ptep_modify_prot_commit(mm, addr, ptep, pte);
 355}
 356#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
 357#endif /* CONFIG_MMU */
 358
 359/*
 360 * A facility to provide lazy MMU batching.  This allows PTE updates and
 361 * page invalidations to be delayed until a call to leave lazy MMU mode
 362 * is issued.  Some architectures may benefit from doing this, and it is
 363 * beneficial for both shadow and direct mode hypervisors, which may batch
 364 * the PTE updates which happen during this window.  Note that using this
 365 * interface requires that read hazards be removed from the code.  A read
 366 * hazard could result in the direct mode hypervisor case, since the actual
 367 * write to the page tables may not yet have taken place, so reads though
 368 * a raw PTE pointer after it has been modified are not guaranteed to be
 369 * up to date.  This mode can only be entered and left under the protection of
 370 * the page table locks for all page tables which may be modified.  In the UP
 371 * case, this is required so that preemption is disabled, and in the SMP case,
 372 * it must synchronize the delayed page table writes properly on other CPUs.
 373 */
 374#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 375#define arch_enter_lazy_mmu_mode()      do {} while (0)
 376#define arch_leave_lazy_mmu_mode()      do {} while (0)
 377#define arch_flush_lazy_mmu_mode()      do {} while (0)
 378#endif
 379
 380/*
 381 * A facility to provide batching of the reload of page tables and
 382 * other process state with the actual context switch code for
 383 * paravirtualized guests.  By convention, only one of the batched
 384 * update (lazy) modes (CPU, MMU) should be active at any given time,
 385 * entry should never be nested, and entry and exits should always be
 386 * paired.  This is for sanity of maintaining and reasoning about the
 387 * kernel code.  In this case, the exit (end of the context switch) is
 388 * in architecture-specific code, and so doesn't need a generic
 389 * definition.
 390 */
 391#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
 392#define arch_start_context_switch(prev) do {} while (0)
 393#endif
 394
 395#ifndef __HAVE_PFNMAP_TRACKING
 396/*
 397 * Interfaces that can be used by architecture code to keep track of
 398 * memory type of pfn mappings specified by the remap_pfn_range,
 399 * vm_insert_pfn.
 400 */
 401
 402/*
 403 * track_pfn_remap is called when a _new_ pfn mapping is being established
 404 * by remap_pfn_range() for physical range indicated by pfn and size.
 405 */
 406static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 407                                  unsigned long pfn, unsigned long addr,
 408                                  unsigned long size)
 409{
 410        return 0;
 411}
 412
 413/*
 414 * track_pfn_insert is called when a _new_ single pfn is established
 415 * by vm_insert_pfn().
 416 */
 417static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 418                                   unsigned long pfn)
 419{
 420        return 0;
 421}
 422
 423/*
 424 * track_pfn_copy is called when vma that is covering the pfnmap gets
 425 * copied through copy_page_range().
 426 */
 427static inline int track_pfn_copy(struct vm_area_struct *vma)
 428{
 429        return 0;
 430}
 431
 432/*
 433 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
 434 * untrack can be called for a specific region indicated by pfn and size or
 435 * can be for the entire vma (in which case pfn, size are zero).
 436 */
 437static inline void untrack_pfn(struct vm_area_struct *vma,
 438                               unsigned long pfn, unsigned long size)
 439{
 440}
 441#else
 442extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 443                           unsigned long pfn, unsigned long addr,
 444                           unsigned long size);
 445extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 446                            unsigned long pfn);
 447extern int track_pfn_copy(struct vm_area_struct *vma);
 448extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 449                        unsigned long size);
 450#endif
 451
 452#ifdef CONFIG_MMU
 453
 454#ifndef CONFIG_TRANSPARENT_HUGEPAGE
 455static inline int pmd_trans_huge(pmd_t pmd)
 456{
 457        return 0;
 458}
 459static inline int pmd_trans_splitting(pmd_t pmd)
 460{
 461        return 0;
 462}
 463#ifndef __HAVE_ARCH_PMD_WRITE
 464static inline int pmd_write(pmd_t pmd)
 465{
 466        BUG();
 467        return 0;
 468}
 469#endif /* __HAVE_ARCH_PMD_WRITE */
 470#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 471
 472#ifndef pmd_read_atomic
 473static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 474{
 475        /*
 476         * Depend on compiler for an atomic pmd read. NOTE: this is
 477         * only going to work, if the pmdval_t isn't larger than
 478         * an unsigned long.
 479         */
 480        return *pmdp;
 481}
 482#endif
 483
 484/*
 485 * This function is meant to be used by sites walking pagetables with
 486 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
 487 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
 488 * into a null pmd and the transhuge page fault can convert a null pmd
 489 * into an hugepmd or into a regular pmd (if the hugepage allocation
 490 * fails). While holding the mmap_sem in read mode the pmd becomes
 491 * stable and stops changing under us only if it's not null and not a
 492 * transhuge pmd. When those races occurs and this function makes a
 493 * difference vs the standard pmd_none_or_clear_bad, the result is
 494 * undefined so behaving like if the pmd was none is safe (because it
 495 * can return none anyway). The compiler level barrier() is critically
 496 * important to compute the two checks atomically on the same pmdval.
 497 *
 498 * For 32bit kernels with a 64bit large pmd_t this automatically takes
 499 * care of reading the pmd atomically to avoid SMP race conditions
 500 * against pmd_populate() when the mmap_sem is hold for reading by the
 501 * caller (a special atomic read not done by "gcc" as in the generic
 502 * version above, is also needed when THP is disabled because the page
 503 * fault can populate the pmd from under us).
 504 */
 505static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 506{
 507        pmd_t pmdval = pmd_read_atomic(pmd);
 508        /*
 509         * The barrier will stabilize the pmdval in a register or on
 510         * the stack so that it will stop changing under the code.
 511         *
 512         * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
 513         * pmd_read_atomic is allowed to return a not atomic pmdval
 514         * (for example pointing to an hugepage that has never been
 515         * mapped in the pmd). The below checks will only care about
 516         * the low part of the pmd with 32bit PAE x86 anyway, with the
 517         * exception of pmd_none(). So the important thing is that if
 518         * the low part of the pmd is found null, the high part will
 519         * be also null or the pmd_none() check below would be
 520         * confused.
 521         */
 522#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 523        barrier();
 524#endif
 525        if (pmd_none(pmdval))
 526                return 1;
 527        if (unlikely(pmd_bad(pmdval))) {
 528                if (!pmd_trans_huge(pmdval))
 529                        pmd_clear_bad(pmd);
 530                return 1;
 531        }
 532        return 0;
 533}
 534
 535/*
 536 * This is a noop if Transparent Hugepage Support is not built into
 537 * the kernel. Otherwise it is equivalent to
 538 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
 539 * places that already verified the pmd is not none and they want to
 540 * walk ptes while holding the mmap sem in read mode (write mode don't
 541 * need this). If THP is not enabled, the pmd can't go away under the
 542 * code even if MADV_DONTNEED runs, but if THP is enabled we need to
 543 * run a pmd_trans_unstable before walking the ptes after
 544 * split_huge_page_pmd returns (because it may have run when the pmd
 545 * become null, but then a page fault can map in a THP and not a
 546 * regular page).
 547 */
 548static inline int pmd_trans_unstable(pmd_t *pmd)
 549{
 550#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 551        return pmd_none_or_trans_huge_or_clear_bad(pmd);
 552#else
 553        return 0;
 554#endif
 555}
 556
 557#endif /* CONFIG_MMU */
 558
 559#endif /* !__ASSEMBLY__ */
 560
 561#endif /* _ASM_GENERIC_PGTABLE_H */
 562
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.