linux/arch/arm64/mm/hugetlbpage.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * arch/arm64/mm/hugetlbpage.c
   4 *
   5 * Copyright (C) 2013 Linaro Ltd.
   6 *
   7 * Based on arch/x86/mm/hugetlbpage.c.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/fs.h>
  12#include <linux/mm.h>
  13#include <linux/hugetlb.h>
  14#include <linux/pagemap.h>
  15#include <linux/err.h>
  16#include <linux/sysctl.h>
  17#include <asm/mman.h>
  18#include <asm/tlb.h>
  19#include <asm/tlbflush.h>
  20
  21/*
  22 * HugeTLB Support Matrix
  23 *
  24 * ---------------------------------------------------
  25 * | Page Size | CONT PTE |  PMD  | CONT PMD |  PUD  |
  26 * ---------------------------------------------------
  27 * |     4K    |   64K    |   2M  |    32M   |   1G  |
  28 * |    16K    |    2M    |  32M  |     1G   |       |
  29 * |    64K    |    2M    | 512M  |    16G   |       |
  30 * ---------------------------------------------------
  31 */
  32
  33/*
  34 * Reserve CMA areas for the largest supported gigantic
  35 * huge page when requested. Any other smaller gigantic
  36 * huge pages could still be served from those areas.
  37 */
  38#ifdef CONFIG_CMA
  39void __init arm64_hugetlb_cma_reserve(void)
  40{
  41        int order;
  42
  43#ifdef CONFIG_ARM64_4K_PAGES
  44        order = PUD_SHIFT - PAGE_SHIFT;
  45#else
  46        order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT;
  47#endif
  48        /*
  49         * HugeTLB CMA reservation is required for gigantic
  50         * huge pages which could not be allocated via the
  51         * page allocator. Just warn if there is any change
  52         * breaking this assumption.
  53         */
  54        WARN_ON(order <= MAX_ORDER);
  55        hugetlb_cma_reserve(order);
  56}
  57#endif /* CONFIG_CMA */
  58
  59#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
  60bool arch_hugetlb_migration_supported(struct hstate *h)
  61{
  62        size_t pagesize = huge_page_size(h);
  63
  64        switch (pagesize) {
  65#ifdef CONFIG_ARM64_4K_PAGES
  66        case PUD_SIZE:
  67#endif
  68        case PMD_SIZE:
  69        case CONT_PMD_SIZE:
  70        case CONT_PTE_SIZE:
  71                return true;
  72        }
  73        pr_warn("%s: unrecognized huge page size 0x%lx\n",
  74                        __func__, pagesize);
  75        return false;
  76}
  77#endif
  78
  79int pmd_huge(pmd_t pmd)
  80{
  81        return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
  82}
  83
  84int pud_huge(pud_t pud)
  85{
  86#ifndef __PAGETABLE_PMD_FOLDED
  87        return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
  88#else
  89        return 0;
  90#endif
  91}
  92
  93/*
  94 * Select all bits except the pfn
  95 */
  96static inline pgprot_t pte_pgprot(pte_t pte)
  97{
  98        unsigned long pfn = pte_pfn(pte);
  99
 100        return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
 101}
 102
 103static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 104                           pte_t *ptep, size_t *pgsize)
 105{
 106        pgd_t *pgdp = pgd_offset(mm, addr);
 107        p4d_t *p4dp;
 108        pud_t *pudp;
 109        pmd_t *pmdp;
 110
 111        *pgsize = PAGE_SIZE;
 112        p4dp = p4d_offset(pgdp, addr);
 113        pudp = pud_offset(p4dp, addr);
 114        pmdp = pmd_offset(pudp, addr);
 115        if ((pte_t *)pmdp == ptep) {
 116                *pgsize = PMD_SIZE;
 117                return CONT_PMDS;
 118        }
 119        return CONT_PTES;
 120}
 121
 122static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
 123{
 124        int contig_ptes = 0;
 125
 126        *pgsize = size;
 127
 128        switch (size) {
 129#ifdef CONFIG_ARM64_4K_PAGES
 130        case PUD_SIZE:
 131#endif
 132        case PMD_SIZE:
 133                contig_ptes = 1;
 134                break;
 135        case CONT_PMD_SIZE:
 136                *pgsize = PMD_SIZE;
 137                contig_ptes = CONT_PMDS;
 138                break;
 139        case CONT_PTE_SIZE:
 140                *pgsize = PAGE_SIZE;
 141                contig_ptes = CONT_PTES;
 142                break;
 143        }
 144
 145        return contig_ptes;
 146}
 147
 148/*
 149 * Changing some bits of contiguous entries requires us to follow a
 150 * Break-Before-Make approach, breaking the whole contiguous set
 151 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 152 * "Misprogramming of the Contiguous bit", page D4-1762.
 153 *
 154 * This helper performs the break step.
 155 */
 156static pte_t get_clear_flush(struct mm_struct *mm,
 157                             unsigned long addr,
 158                             pte_t *ptep,
 159                             unsigned long pgsize,
 160                             unsigned long ncontig)
 161{
 162        pte_t orig_pte = huge_ptep_get(ptep);
 163        bool valid = pte_valid(orig_pte);
 164        unsigned long i, saddr = addr;
 165
 166        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
 167                pte_t pte = ptep_get_and_clear(mm, addr, ptep);
 168
 169                /*
 170                 * If HW_AFDBM is enabled, then the HW could turn on
 171                 * the dirty or accessed bit for any page in the set,
 172                 * so check them all.
 173                 */
 174                if (pte_dirty(pte))
 175                        orig_pte = pte_mkdirty(orig_pte);
 176
 177                if (pte_young(pte))
 178                        orig_pte = pte_mkyoung(orig_pte);
 179        }
 180
 181        if (valid) {
 182                struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 183                flush_tlb_range(&vma, saddr, addr);
 184        }
 185        return orig_pte;
 186}
 187
 188/*
 189 * Changing some bits of contiguous entries requires us to follow a
 190 * Break-Before-Make approach, breaking the whole contiguous set
 191 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 192 * "Misprogramming of the Contiguous bit", page D4-1762.
 193 *
 194 * This helper performs the break step for use cases where the
 195 * original pte is not needed.
 196 */
 197static void clear_flush(struct mm_struct *mm,
 198                             unsigned long addr,
 199                             pte_t *ptep,
 200                             unsigned long pgsize,
 201                             unsigned long ncontig)
 202{
 203        struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 204        unsigned long i, saddr = addr;
 205
 206        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 207                pte_clear(mm, addr, ptep);
 208
 209        flush_tlb_range(&vma, saddr, addr);
 210}
 211
 212void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 213                            pte_t *ptep, pte_t pte)
 214{
 215        size_t pgsize;
 216        int i;
 217        int ncontig;
 218        unsigned long pfn, dpfn;
 219        pgprot_t hugeprot;
 220
 221        /*
 222         * Code needs to be expanded to handle huge swap and migration
 223         * entries. Needed for HUGETLB and MEMORY_FAILURE.
 224         */
 225        WARN_ON(!pte_present(pte));
 226
 227        if (!pte_cont(pte)) {
 228                set_pte_at(mm, addr, ptep, pte);
 229                return;
 230        }
 231
 232        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 233        pfn = pte_pfn(pte);
 234        dpfn = pgsize >> PAGE_SHIFT;
 235        hugeprot = pte_pgprot(pte);
 236
 237        clear_flush(mm, addr, ptep, pgsize, ncontig);
 238
 239        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 240                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 241}
 242
 243void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 244                          pte_t *ptep, pte_t pte, unsigned long sz)
 245{
 246        int i, ncontig;
 247        size_t pgsize;
 248
 249        ncontig = num_contig_ptes(sz, &pgsize);
 250
 251        for (i = 0; i < ncontig; i++, ptep++)
 252                set_pte(ptep, pte);
 253}
 254
 255pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 256                      unsigned long addr, unsigned long sz)
 257{
 258        pgd_t *pgdp;
 259        p4d_t *p4dp;
 260        pud_t *pudp;
 261        pmd_t *pmdp;
 262        pte_t *ptep = NULL;
 263
 264        pgdp = pgd_offset(mm, addr);
 265        p4dp = p4d_offset(pgdp, addr);
 266        pudp = pud_alloc(mm, p4dp, addr);
 267        if (!pudp)
 268                return NULL;
 269
 270        if (sz == PUD_SIZE) {
 271                ptep = (pte_t *)pudp;
 272        } else if (sz == (CONT_PTE_SIZE)) {
 273                pmdp = pmd_alloc(mm, pudp, addr);
 274                if (!pmdp)
 275                        return NULL;
 276
 277                WARN_ON(addr & (sz - 1));
 278                /*
 279                 * Note that if this code were ever ported to the
 280                 * 32-bit arm platform then it will cause trouble in
 281                 * the case where CONFIG_HIGHPTE is set, since there
 282                 * will be no pte_unmap() to correspond with this
 283                 * pte_alloc_map().
 284                 */
 285                ptep = pte_alloc_map(mm, pmdp, addr);
 286        } else if (sz == PMD_SIZE) {
 287                if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
 288                        ptep = huge_pmd_share(mm, vma, addr, pudp);
 289                else
 290                        ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
 291        } else if (sz == (CONT_PMD_SIZE)) {
 292                pmdp = pmd_alloc(mm, pudp, addr);
 293                WARN_ON(addr & (sz - 1));
 294                return (pte_t *)pmdp;
 295        }
 296
 297        return ptep;
 298}
 299
 300pte_t *huge_pte_offset(struct mm_struct *mm,
 301                       unsigned long addr, unsigned long sz)
 302{
 303        pgd_t *pgdp;
 304        p4d_t *p4dp;
 305        pud_t *pudp, pud;
 306        pmd_t *pmdp, pmd;
 307
 308        pgdp = pgd_offset(mm, addr);
 309        if (!pgd_present(READ_ONCE(*pgdp)))
 310                return NULL;
 311
 312        p4dp = p4d_offset(pgdp, addr);
 313        if (!p4d_present(READ_ONCE(*p4dp)))
 314                return NULL;
 315
 316        pudp = pud_offset(p4dp, addr);
 317        pud = READ_ONCE(*pudp);
 318        if (sz != PUD_SIZE && pud_none(pud))
 319                return NULL;
 320        /* hugepage or swap? */
 321        if (pud_huge(pud) || !pud_present(pud))
 322                return (pte_t *)pudp;
 323        /* table; check the next level */
 324
 325        if (sz == CONT_PMD_SIZE)
 326                addr &= CONT_PMD_MASK;
 327
 328        pmdp = pmd_offset(pudp, addr);
 329        pmd = READ_ONCE(*pmdp);
 330        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
 331            pmd_none(pmd))
 332                return NULL;
 333        if (pmd_huge(pmd) || !pmd_present(pmd))
 334                return (pte_t *)pmdp;
 335
 336        if (sz == CONT_PTE_SIZE)
 337                return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 338
 339        return NULL;
 340}
 341
 342pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
 343{
 344        size_t pagesize = 1UL << shift;
 345
 346        if (pagesize == CONT_PTE_SIZE) {
 347                entry = pte_mkcont(entry);
 348        } else if (pagesize == CONT_PMD_SIZE) {
 349                entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
 350        } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
 351                pr_warn("%s: unrecognized huge page size 0x%lx\n",
 352                        __func__, pagesize);
 353        }
 354        return entry;
 355}
 356
 357void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 358                    pte_t *ptep, unsigned long sz)
 359{
 360        int i, ncontig;
 361        size_t pgsize;
 362
 363        ncontig = num_contig_ptes(sz, &pgsize);
 364
 365        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 366                pte_clear(mm, addr, ptep);
 367}
 368
 369pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 370                              unsigned long addr, pte_t *ptep)
 371{
 372        int ncontig;
 373        size_t pgsize;
 374        pte_t orig_pte = huge_ptep_get(ptep);
 375
 376        if (!pte_cont(orig_pte))
 377                return ptep_get_and_clear(mm, addr, ptep);
 378
 379        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 380
 381        return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 382}
 383
 384/*
 385 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
 386 * and write permission.
 387 *
 388 * For a contiguous huge pte range we need to check whether or not write
 389 * permission has to change only on the first pte in the set. Then for
 390 * all the contiguous ptes we need to check whether or not there is a
 391 * discrepancy between dirty or young.
 392 */
 393static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
 394{
 395        int i;
 396
 397        if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
 398                return 1;
 399
 400        for (i = 0; i < ncontig; i++) {
 401                pte_t orig_pte = huge_ptep_get(ptep + i);
 402
 403                if (pte_dirty(pte) != pte_dirty(orig_pte))
 404                        return 1;
 405
 406                if (pte_young(pte) != pte_young(orig_pte))
 407                        return 1;
 408        }
 409
 410        return 0;
 411}
 412
 413int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 414                               unsigned long addr, pte_t *ptep,
 415                               pte_t pte, int dirty)
 416{
 417        int ncontig, i;
 418        size_t pgsize = 0;
 419        unsigned long pfn = pte_pfn(pte), dpfn;
 420        pgprot_t hugeprot;
 421        pte_t orig_pte;
 422
 423        if (!pte_cont(pte))
 424                return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 425
 426        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 427        dpfn = pgsize >> PAGE_SHIFT;
 428
 429        if (!__cont_access_flags_changed(ptep, pte, ncontig))
 430                return 0;
 431
 432        orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 433
 434        /* Make sure we don't lose the dirty or young state */
 435        if (pte_dirty(orig_pte))
 436                pte = pte_mkdirty(pte);
 437
 438        if (pte_young(orig_pte))
 439                pte = pte_mkyoung(pte);
 440
 441        hugeprot = pte_pgprot(pte);
 442        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 443                set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 444
 445        return 1;
 446}
 447
 448void huge_ptep_set_wrprotect(struct mm_struct *mm,
 449                             unsigned long addr, pte_t *ptep)
 450{
 451        unsigned long pfn, dpfn;
 452        pgprot_t hugeprot;
 453        int ncontig, i;
 454        size_t pgsize;
 455        pte_t pte;
 456
 457        if (!pte_cont(READ_ONCE(*ptep))) {
 458                ptep_set_wrprotect(mm, addr, ptep);
 459                return;
 460        }
 461
 462        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 463        dpfn = pgsize >> PAGE_SHIFT;
 464
 465        pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 466        pte = pte_wrprotect(pte);
 467
 468        hugeprot = pte_pgprot(pte);
 469        pfn = pte_pfn(pte);
 470
 471        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 472                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 473}
 474
 475void huge_ptep_clear_flush(struct vm_area_struct *vma,
 476                           unsigned long addr, pte_t *ptep)
 477{
 478        size_t pgsize;
 479        int ncontig;
 480
 481        if (!pte_cont(READ_ONCE(*ptep))) {
 482                ptep_clear_flush(vma, addr, ptep);
 483                return;
 484        }
 485
 486        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 487        clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 488}
 489
 490static int __init hugetlbpage_init(void)
 491{
 492#ifdef CONFIG_ARM64_4K_PAGES
 493        hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 494#endif
 495        hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
 496        hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 497        hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
 498
 499        return 0;
 500}
 501arch_initcall(hugetlbpage_init);
 502
 503bool __init arch_hugetlb_valid_size(unsigned long size)
 504{
 505        switch (size) {
 506#ifdef CONFIG_ARM64_4K_PAGES
 507        case PUD_SIZE:
 508#endif
 509        case CONT_PMD_SIZE:
 510        case PMD_SIZE:
 511        case CONT_PTE_SIZE:
 512                return true;
 513        }
 514
 515        return false;
 516}
 517