linux/include/asm-generic/pgtable.h
<<
>>
Prefs
   1#ifndef _ASM_GENERIC_PGTABLE_H
   2#define _ASM_GENERIC_PGTABLE_H
   3
   4#ifndef __ASSEMBLY__
   5#ifdef CONFIG_MMU
   6
   7#include <linux/mm_types.h>
   8#include <linux/bug.h>
   9
  10#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  11extern int ptep_set_access_flags(struct vm_area_struct *vma,
  12                                 unsigned long address, pte_t *ptep,
  13                                 pte_t entry, int dirty);
  14#endif
  15
  16#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  17extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  18                                 unsigned long address, pmd_t *pmdp,
  19                                 pmd_t entry, int dirty);
  20#endif
  21
  22#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  23static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  24                                            unsigned long address,
  25                                            pte_t *ptep)
  26{
  27        pte_t pte = *ptep;
  28        int r = 1;
  29        if (!pte_young(pte))
  30                r = 0;
  31        else
  32                set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  33        return r;
  34}
  35#endif
  36
  37#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  38#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  39static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  40                                            unsigned long address,
  41                                            pmd_t *pmdp)
  42{
  43        pmd_t pmd = *pmdp;
  44        int r = 1;
  45        if (!pmd_young(pmd))
  46                r = 0;
  47        else
  48                set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  49        return r;
  50}
  51#else /* CONFIG_TRANSPARENT_HUGEPAGE */
  52static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  53                                            unsigned long address,
  54                                            pmd_t *pmdp)
  55{
  56        BUG();
  57        return 0;
  58}
  59#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  60#endif
  61
  62#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  63int ptep_clear_flush_young(struct vm_area_struct *vma,
  64                           unsigned long address, pte_t *ptep);
  65#endif
  66
  67#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
  68int pmdp_clear_flush_young(struct vm_area_struct *vma,
  69                           unsigned long address, pmd_t *pmdp);
  70#endif
  71
  72#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
  73static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
  74                                       unsigned long address,
  75                                       pte_t *ptep)
  76{
  77        pte_t pte = *ptep;
  78        pte_clear(mm, address, ptep);
  79        return pte;
  80}
  81#endif
  82
  83#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR
  84#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  85static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
  86                                       unsigned long address,
  87                                       pmd_t *pmdp)
  88{
  89        pmd_t pmd = *pmdp;
  90        pmd_clear(mm, address, pmdp);
  91        return pmd;
  92}
  93#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  94#endif
  95
  96#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  97static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
  98                                            unsigned long address, pte_t *ptep,
  99                                            int full)
 100{
 101        pte_t pte;
 102        pte = ptep_get_and_clear(mm, address, ptep);
 103        return pte;
 104}
 105#endif
 106
 107/*
 108 * Some architectures may be able to avoid expensive synchronization
 109 * primitives when modifications are made to PTE's which are already
 110 * not present, or in the process of an address space destruction.
 111 */
 112#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
 113static inline void pte_clear_not_present_full(struct mm_struct *mm,
 114                                              unsigned long address,
 115                                              pte_t *ptep,
 116                                              int full)
 117{
 118        pte_clear(mm, address, ptep);
 119}
 120#endif
 121
 122#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 123extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 124                              unsigned long address,
 125                              pte_t *ptep);
 126#endif
 127
 128#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH
 129extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
 130                              unsigned long address,
 131                              pmd_t *pmdp);
 132#endif
 133
 134#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
 135struct mm_struct;
 136static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 137{
 138        pte_t old_pte = *ptep;
 139        set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 140}
 141#endif
 142
 143#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 144#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 145static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 146                                      unsigned long address, pmd_t *pmdp)
 147{
 148        pmd_t old_pmd = *pmdp;
 149        set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
 150}
 151#else /* CONFIG_TRANSPARENT_HUGEPAGE */
 152static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 153                                      unsigned long address, pmd_t *pmdp)
 154{
 155        BUG();
 156}
 157#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 158#endif
 159
 160#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
 161extern void pmdp_splitting_flush(struct vm_area_struct *vma,
 162                                 unsigned long address, pmd_t *pmdp);
 163#endif
 164
 165#ifndef __HAVE_ARCH_PTE_SAME
 166static inline int pte_same(pte_t pte_a, pte_t pte_b)
 167{
 168        return pte_val(pte_a) == pte_val(pte_b);
 169}
 170#endif
 171
 172#ifndef __HAVE_ARCH_PMD_SAME
 173#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 174static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 175{
 176        return pmd_val(pmd_a) == pmd_val(pmd_b);
 177}
 178#else /* CONFIG_TRANSPARENT_HUGEPAGE */
 179static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 180{
 181        BUG();
 182        return 0;
 183}
 184#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 185#endif
 186
 187#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 188#define page_test_and_clear_dirty(pfn, mapped)  (0)
 189#endif
 190
 191#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 192#define pte_maybe_dirty(pte)            pte_dirty(pte)
 193#else
 194#define pte_maybe_dirty(pte)            (1)
 195#endif
 196
 197#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 198#define page_test_and_clear_young(pfn) (0)
 199#endif
 200
 201#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 202#define pgd_offset_gate(mm, addr)       pgd_offset(mm, addr)
 203#endif
 204
 205#ifndef __HAVE_ARCH_MOVE_PTE
 206#define move_pte(pte, prot, old_addr, new_addr) (pte)
 207#endif
 208
 209#ifndef flush_tlb_fix_spurious_fault
 210#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 211#endif
 212
 213#ifndef pgprot_noncached
 214#define pgprot_noncached(prot)  (prot)
 215#endif
 216
 217#ifndef pgprot_writecombine
 218#define pgprot_writecombine pgprot_noncached
 219#endif
 220
 221/*
 222 * When walking page tables, get the address of the next boundary,
 223 * or the end address of the range if that comes earlier.  Although no
 224 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 225 */
 226
 227#define pgd_addr_end(addr, end)                                         \
 228({      unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
 229        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 230})
 231
 232#ifndef pud_addr_end
 233#define pud_addr_end(addr, end)                                         \
 234({      unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
 235        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 236})
 237#endif
 238
 239#ifndef pmd_addr_end
 240#define pmd_addr_end(addr, end)                                         \
 241({      unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
 242        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 243})
 244#endif
 245
 246/*
 247 * When walking page tables, we usually want to skip any p?d_none entries;
 248 * and any p?d_bad entries - reporting the error before resetting to none.
 249 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 250 */
 251void pgd_clear_bad(pgd_t *);
 252void pud_clear_bad(pud_t *);
 253void pmd_clear_bad(pmd_t *);
 254
 255static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 256{
 257        if (pgd_none(*pgd))
 258                return 1;
 259        if (unlikely(pgd_bad(*pgd))) {
 260                pgd_clear_bad(pgd);
 261                return 1;
 262        }
 263        return 0;
 264}
 265
 266static inline int pud_none_or_clear_bad(pud_t *pud)
 267{
 268        if (pud_none(*pud))
 269                return 1;
 270        if (unlikely(pud_bad(*pud))) {
 271                pud_clear_bad(pud);
 272                return 1;
 273        }
 274        return 0;
 275}
 276
 277static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 278{
 279        if (pmd_none(*pmd))
 280                return 1;
 281        if (unlikely(pmd_bad(*pmd))) {
 282                pmd_clear_bad(pmd);
 283                return 1;
 284        }
 285        return 0;
 286}
 287
 288static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
 289                                             unsigned long addr,
 290                                             pte_t *ptep)
 291{
 292        /*
 293         * Get the current pte state, but zero it out to make it
 294         * non-present, preventing the hardware from asynchronously
 295         * updating it.
 296         */
 297        return ptep_get_and_clear(mm, addr, ptep);
 298}
 299
 300static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
 301                                             unsigned long addr,
 302                                             pte_t *ptep, pte_t pte)
 303{
 304        /*
 305         * The pte is non-present, so there's no hardware state to
 306         * preserve.
 307         */
 308        set_pte_at(mm, addr, ptep, pte);
 309}
 310
 311#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 312/*
 313 * Start a pte protection read-modify-write transaction, which
 314 * protects against asynchronous hardware modifications to the pte.
 315 * The intention is not to prevent the hardware from making pte
 316 * updates, but to prevent any updates it may make from being lost.
 317 *
 318 * This does not protect against other software modifications of the
 319 * pte; the appropriate pte lock must be held over the transation.
 320 *
 321 * Note that this interface is intended to be batchable, meaning that
 322 * ptep_modify_prot_commit may not actually update the pte, but merely
 323 * queue the update to be done at some later time.  The update must be
 324 * actually committed before the pte lock is released, however.
 325 */
 326static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 327                                           unsigned long addr,
 328                                           pte_t *ptep)
 329{
 330        return __ptep_modify_prot_start(mm, addr, ptep);
 331}
 332
 333/*
 334 * Commit an update to a pte, leaving any hardware-controlled bits in
 335 * the PTE unmodified.
 336 */
 337static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 338                                           unsigned long addr,
 339                                           pte_t *ptep, pte_t pte)
 340{
 341        __ptep_modify_prot_commit(mm, addr, ptep, pte);
 342}
 343#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
 344#endif /* CONFIG_MMU */
 345
 346/*
 347 * A facility to provide lazy MMU batching.  This allows PTE updates and
 348 * page invalidations to be delayed until a call to leave lazy MMU mode
 349 * is issued.  Some architectures may benefit from doing this, and it is
 350 * beneficial for both shadow and direct mode hypervisors, which may batch
 351 * the PTE updates which happen during this window.  Note that using this
 352 * interface requires that read hazards be removed from the code.  A read
 353 * hazard could result in the direct mode hypervisor case, since the actual
 354 * write to the page tables may not yet have taken place, so reads though
 355 * a raw PTE pointer after it has been modified are not guaranteed to be
 356 * up to date.  This mode can only be entered and left under the protection of
 357 * the page table locks for all page tables which may be modified.  In the UP
 358 * case, this is required so that preemption is disabled, and in the SMP case,
 359 * it must synchronize the delayed page table writes properly on other CPUs.
 360 */
 361#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 362#define arch_enter_lazy_mmu_mode()      do {} while (0)
 363#define arch_leave_lazy_mmu_mode()      do {} while (0)
 364#define arch_flush_lazy_mmu_mode()      do {} while (0)
 365#endif
 366
 367/*
 368 * A facility to provide batching of the reload of page tables and
 369 * other process state with the actual context switch code for
 370 * paravirtualized guests.  By convention, only one of the batched
 371 * update (lazy) modes (CPU, MMU) should be active at any given time,
 372 * entry should never be nested, and entry and exits should always be
 373 * paired.  This is for sanity of maintaining and reasoning about the
 374 * kernel code.  In this case, the exit (end of the context switch) is
 375 * in architecture-specific code, and so doesn't need a generic
 376 * definition.
 377 */
 378#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
 379#define arch_start_context_switch(prev) do {} while (0)
 380#endif
 381
 382#ifndef __HAVE_PFNMAP_TRACKING
 383/*
 384 * Interface that can be used by architecture code to keep track of
 385 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
 386 *
 387 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
 388 * for physical range indicated by pfn and size.
 389 */
 390static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 391                                        unsigned long pfn, unsigned long size)
 392{
 393        return 0;
 394}
 395
 396/*
 397 * Interface that can be used by architecture code to keep track of
 398 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
 399 *
 400 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
 401 * copied through copy_page_range().
 402 */
 403static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
 404{
 405        return 0;
 406}
 407
 408/*
 409 * Interface that can be used by architecture code to keep track of
 410 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
 411 *
 412 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
 413 * untrack can be called for a specific region indicated by pfn and size or
 414 * can be for the entire vma (in which case size can be zero).
 415 */
 416static inline void untrack_pfn_vma(struct vm_area_struct *vma,
 417                                        unsigned long pfn, unsigned long size)
 418{
 419}
 420#else
 421extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 422                                unsigned long pfn, unsigned long size);
 423extern int track_pfn_vma_copy(struct vm_area_struct *vma);
 424extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 425                                unsigned long size);
 426#endif
 427
 428#ifdef CONFIG_MMU
 429
 430#ifndef CONFIG_TRANSPARENT_HUGEPAGE
 431static inline int pmd_trans_huge(pmd_t pmd)
 432{
 433        return 0;
 434}
 435static inline int pmd_trans_splitting(pmd_t pmd)
 436{
 437        return 0;
 438}
 439#ifndef __HAVE_ARCH_PMD_WRITE
 440static inline int pmd_write(pmd_t pmd)
 441{
 442        BUG();
 443        return 0;
 444}
 445#endif /* __HAVE_ARCH_PMD_WRITE */
 446#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 447
 448#ifndef pmd_read_atomic
 449static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 450{
 451        /*
 452         * Depend on compiler for an atomic pmd read. NOTE: this is
 453         * only going to work, if the pmdval_t isn't larger than
 454         * an unsigned long.
 455         */
 456        return *pmdp;
 457}
 458#endif
 459
 460/*
 461 * This function is meant to be used by sites walking pagetables with
 462 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
 463 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
 464 * into a null pmd and the transhuge page fault can convert a null pmd
 465 * into an hugepmd or into a regular pmd (if the hugepage allocation
 466 * fails). While holding the mmap_sem in read mode the pmd becomes
 467 * stable and stops changing under us only if it's not null and not a
 468 * transhuge pmd. When those races occurs and this function makes a
 469 * difference vs the standard pmd_none_or_clear_bad, the result is
 470 * undefined so behaving like if the pmd was none is safe (because it
 471 * can return none anyway). The compiler level barrier() is critically
 472 * important to compute the two checks atomically on the same pmdval.
 473 *
 474 * For 32bit kernels with a 64bit large pmd_t this automatically takes
 475 * care of reading the pmd atomically to avoid SMP race conditions
 476 * against pmd_populate() when the mmap_sem is hold for reading by the
 477 * caller (a special atomic read not done by "gcc" as in the generic
 478 * version above, is also needed when THP is disabled because the page
 479 * fault can populate the pmd from under us).
 480 */
 481static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 482{
 483        pmd_t pmdval = pmd_read_atomic(pmd);
 484        /*
 485         * The barrier will stabilize the pmdval in a register or on
 486         * the stack so that it will stop changing under the code.
 487         *
 488         * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
 489         * pmd_read_atomic is allowed to return a not atomic pmdval
 490         * (for example pointing to an hugepage that has never been
 491         * mapped in the pmd). The below checks will only care about
 492         * the low part of the pmd with 32bit PAE x86 anyway, with the
 493         * exception of pmd_none(). So the important thing is that if
 494         * the low part of the pmd is found null, the high part will
 495         * be also null or the pmd_none() check below would be
 496         * confused.
 497         */
 498#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 499        barrier();
 500#endif
 501        if (pmd_none(pmdval))
 502                return 1;
 503        if (unlikely(pmd_bad(pmdval))) {
 504                if (!pmd_trans_huge(pmdval))
 505                        pmd_clear_bad(pmd);
 506                return 1;
 507        }
 508        return 0;
 509}
 510
 511/*
 512 * This is a noop if Transparent Hugepage Support is not built into
 513 * the kernel. Otherwise it is equivalent to
 514 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
 515 * places that already verified the pmd is not none and they want to
 516 * walk ptes while holding the mmap sem in read mode (write mode don't
 517 * need this). If THP is not enabled, the pmd can't go away under the
 518 * code even if MADV_DONTNEED runs, but if THP is enabled we need to
 519 * run a pmd_trans_unstable before walking the ptes after
 520 * split_huge_page_pmd returns (because it may have run when the pmd
 521 * become null, but then a page fault can map in a THP and not a
 522 * regular page).
 523 */
 524static inline int pmd_trans_unstable(pmd_t *pmd)
 525{
 526#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 527        return pmd_none_or_trans_huge_or_clear_bad(pmd);
 528#else
 529        return 0;
 530#endif
 531}
 532
 533#endif /* CONFIG_MMU */
 534
 535#endif /* !__ASSEMBLY__ */
 536
 537#endif /* _ASM_GENERIC_PGTABLE_H */
 538
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.