1
2
3
4
5
6
7
8
9
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/pagemap.h>
15#include <linux/slab.h>
16#include <linux/err.h>
17#include <linux/sysctl.h>
18#include <asm/mman.h>
19#include <asm/pgalloc.h>
20#include <asm/tlb.h>
21#include <asm/tlbflush.h>
22#include <asm/mmu_context.h>
23#include <asm/machdep.h>
24#include <asm/cputable.h>
25#include <asm/spu.h>
26
27#define PAGE_SHIFT_64K 16
28#define PAGE_SHIFT_16M 24
29#define PAGE_SHIFT_16G 34
30
31#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
32#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
33#define MAX_NUMBER_GPAGES 1024
34
35
36
37static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
38static unsigned nr_gpages;
39
40
41
42
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { };
44
45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize])
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53
54
55
56#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
57#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
58
59static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
60 "unused_4K", "hugepte_cache_64K", "unused_64K_AP",
61 "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G"
62};
63
64
65
66
67#define HUGEPD_OK 0x1
68
69typedef struct { unsigned long pd; } hugepd_t;
70
71#define hugepd_none(hpd) ((hpd).pd == 0)
72
73static inline int shift_to_mmu_psize(unsigned int shift)
74{
75 switch (shift) {
76#ifndef CONFIG_PPC_64K_PAGES
77 case PAGE_SHIFT_64K:
78 return MMU_PAGE_64K;
79#endif
80 case PAGE_SHIFT_16M:
81 return MMU_PAGE_16M;
82 case PAGE_SHIFT_16G:
83 return MMU_PAGE_16G;
84 }
85 return -1;
86}
87
88static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
89{
90 if (mmu_psize_defs[mmu_psize].shift)
91 return mmu_psize_defs[mmu_psize].shift;
92 BUG();
93}
94
95static inline pte_t *hugepd_page(hugepd_t hpd)
96{
97 BUG_ON(!(hpd.pd & HUGEPD_OK));
98 return (pte_t *)(hpd.pd & ~HUGEPD_OK);
99}
100
101static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
102 struct hstate *hstate)
103{
104 unsigned int shift = huge_page_shift(hstate);
105 int psize = shift_to_mmu_psize(shift);
106 unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
107 pte_t *dir = hugepd_page(*hpdp);
108
109 return dir + idx;
110}
111
112static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
113 unsigned long address, unsigned int psize)
114{
115 pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],
116 GFP_KERNEL|__GFP_REPEAT);
117
118 if (! new)
119 return -ENOMEM;
120
121 spin_lock(&mm->page_table_lock);
122 if (!hugepd_none(*hpdp))
123 kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);
124 else
125 hpdp->pd = (unsigned long)new | HUGEPD_OK;
126 spin_unlock(&mm->page_table_lock);
127 return 0;
128}
129
130
131static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate)
132{
133 if (huge_page_shift(hstate) < PUD_SHIFT)
134 return pud_offset(pgd, addr);
135 else
136 return (pud_t *) pgd;
137}
138static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr,
139 struct hstate *hstate)
140{
141 if (huge_page_shift(hstate) < PUD_SHIFT)
142 return pud_alloc(mm, pgd, addr);
143 else
144 return (pud_t *) pgd;
145}
146static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
147{
148 if (huge_page_shift(hstate) < PMD_SHIFT)
149 return pmd_offset(pud, addr);
150 else
151 return (pmd_t *) pud;
152}
153static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
154 struct hstate *hstate)
155{
156 if (huge_page_shift(hstate) < PMD_SHIFT)
157 return pmd_alloc(mm, pud, addr);
158 else
159 return (pmd_t *) pud;
160}
161
162
163
164
165void add_gpage(unsigned long addr, unsigned long page_size,
166 unsigned long number_of_pages)
167{
168 if (!addr)
169 return;
170 while (number_of_pages > 0) {
171 gpage_freearray[nr_gpages] = addr;
172 nr_gpages++;
173 number_of_pages--;
174 addr += page_size;
175 }
176}
177
178
179
180
181int alloc_bootmem_huge_page(struct hstate *hstate)
182{
183 struct huge_bootmem_page *m;
184 if (nr_gpages == 0)
185 return 0;
186 m = phys_to_virt(gpage_freearray[--nr_gpages]);
187 gpage_freearray[nr_gpages] = 0;
188 list_add(&m->list, &huge_boot_pages);
189 m->hstate = hstate;
190 return 1;
191}
192
193
194
195pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
196{
197 pgd_t *pg;
198 pud_t *pu;
199 pmd_t *pm;
200
201 unsigned int psize;
202 unsigned int shift;
203 unsigned long sz;
204 struct hstate *hstate;
205 psize = get_slice_psize(mm, addr);
206 shift = mmu_psize_to_shift(psize);
207 sz = ((1UL) << shift);
208 hstate = size_to_hstate(sz);
209
210 addr &= hstate->mask;
211
212 pg = pgd_offset(mm, addr);
213 if (!pgd_none(*pg)) {
214 pu = hpud_offset(pg, addr, hstate);
215 if (!pud_none(*pu)) {
216 pm = hpmd_offset(pu, addr, hstate);
217 if (!pmd_none(*pm))
218 return hugepte_offset((hugepd_t *)pm, addr,
219 hstate);
220 }
221 }
222
223 return NULL;
224}
225
226pte_t *huge_pte_alloc(struct mm_struct *mm,
227 unsigned long addr, unsigned long sz)
228{
229 pgd_t *pg;
230 pud_t *pu;
231 pmd_t *pm;
232 hugepd_t *hpdp = NULL;
233 struct hstate *hstate;
234 unsigned int psize;
235 hstate = size_to_hstate(sz);
236
237 psize = get_slice_psize(mm, addr);
238 BUG_ON(!mmu_huge_psizes[psize]);
239
240 addr &= hstate->mask;
241
242 pg = pgd_offset(mm, addr);
243 pu = hpud_alloc(mm, pg, addr, hstate);
244
245 if (pu) {
246 pm = hpmd_alloc(mm, pu, addr, hstate);
247 if (pm)
248 hpdp = (hugepd_t *)pm;
249 }
250
251 if (! hpdp)
252 return NULL;
253
254 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
255 return NULL;
256
257 return hugepte_offset(hpdp, addr, hstate);
258}
259
260int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
261{
262 return 0;
263}
264
265static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
266 unsigned int psize)
267{
268 pte_t *hugepte = hugepd_page(*hpdp);
269
270 hpdp->pd = 0;
271 tlb->need_flush = 1;
272 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
273 HUGEPTE_CACHE_NUM+psize-1,
274 PGF_CACHENUM_MASK));
275}
276
277static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
278 unsigned long addr, unsigned long end,
279 unsigned long floor, unsigned long ceiling,
280 unsigned int psize)
281{
282 pmd_t *pmd;
283 unsigned long next;
284 unsigned long start;
285
286 start = addr;
287 pmd = pmd_offset(pud, addr);
288 do {
289 next = pmd_addr_end(addr, end);
290 if (pmd_none(*pmd))
291 continue;
292 free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
293 } while (pmd++, addr = next, addr != end);
294
295 start &= PUD_MASK;
296 if (start < floor)
297 return;
298 if (ceiling) {
299 ceiling &= PUD_MASK;
300 if (!ceiling)
301 return;
302 }
303 if (end - 1 > ceiling - 1)
304 return;
305
306 pmd = pmd_offset(pud, start);
307 pud_clear(pud);
308 pmd_free_tlb(tlb, pmd);
309}
310
311static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
312 unsigned long addr, unsigned long end,
313 unsigned long floor, unsigned long ceiling)
314{
315 pud_t *pud;
316 unsigned long next;
317 unsigned long start;
318 unsigned int shift;
319 unsigned int psize = get_slice_psize(tlb->mm, addr);
320 shift = mmu_psize_to_shift(psize);
321
322 start = addr;
323 pud = pud_offset(pgd, addr);
324 do {
325 next = pud_addr_end(addr, end);
326 if (shift < PMD_SHIFT) {
327 if (pud_none_or_clear_bad(pud))
328 continue;
329 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
330 ceiling, psize);
331 } else {
332 if (pud_none(*pud))
333 continue;
334 free_hugepte_range(tlb, (hugepd_t *)pud, psize);
335 }
336 } while (pud++, addr = next, addr != end);
337
338 start &= PGDIR_MASK;
339 if (start < floor)
340 return;
341 if (ceiling) {
342 ceiling &= PGDIR_MASK;
343 if (!ceiling)
344 return;
345 }
346 if (end - 1 > ceiling - 1)
347 return;
348
349 pud = pud_offset(pgd, start);
350 pgd_clear(pgd);
351 pud_free_tlb(tlb, pud);
352}
353
354
355
356
357
358
359void hugetlb_free_pgd_range(struct mmu_gather *tlb,
360 unsigned long addr, unsigned long end,
361 unsigned long floor, unsigned long ceiling)
362{
363 pgd_t *pgd;
364 unsigned long next;
365 unsigned long start;
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401 unsigned int psize = get_slice_psize(tlb->mm, addr);
402
403 addr &= HUGEPD_MASK(psize);
404 if (addr < floor) {
405 addr += HUGEPD_SIZE(psize);
406 if (!addr)
407 return;
408 }
409 if (ceiling) {
410 ceiling &= HUGEPD_MASK(psize);
411 if (!ceiling)
412 return;
413 }
414 if (end - 1 > ceiling - 1)
415 end -= HUGEPD_SIZE(psize);
416 if (addr > end - 1)
417 return;
418
419 start = addr;
420 pgd = pgd_offset(tlb->mm, addr);
421 do {
422 psize = get_slice_psize(tlb->mm, addr);
423 BUG_ON(!mmu_huge_psizes[psize]);
424 next = pgd_addr_end(addr, end);
425 if (mmu_psize_to_shift(psize) < PUD_SHIFT) {
426 if (pgd_none_or_clear_bad(pgd))
427 continue;
428 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
429 } else {
430 if (pgd_none(*pgd))
431 continue;
432 free_hugepte_range(tlb, (hugepd_t *)pgd, psize);
433 }
434 } while (pgd++, addr = next, addr != end);
435}
436
437void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
438 pte_t *ptep, pte_t pte)
439{
440 if (pte_present(*ptep)) {
441
442
443
444
445
446 unsigned int psize = get_slice_psize(mm, addr);
447 unsigned int shift = mmu_psize_to_shift(psize);
448 unsigned long sz = ((1UL) << shift);
449 struct hstate *hstate = size_to_hstate(sz);
450 pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
451 }
452 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
453}
454
455pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
456 pte_t *ptep)
457{
458 unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
459 return __pte(old);
460}
461
462struct page *
463follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
464{
465 pte_t *ptep;
466 struct page *page;
467 unsigned int mmu_psize = get_slice_psize(mm, address);
468
469
470 if (!mmu_huge_psizes[mmu_psize])
471 return ERR_PTR(-EINVAL);
472
473 ptep = huge_pte_offset(mm, address);
474 page = pte_page(*ptep);
475 if (page) {
476 unsigned int shift = mmu_psize_to_shift(mmu_psize);
477 unsigned long sz = ((1UL) << shift);
478 page += (address % sz) / PAGE_SIZE;
479 }
480
481 return page;
482}
483
484int pmd_huge(pmd_t pmd)
485{
486 return 0;
487}
488
489int pud_huge(pud_t pud)
490{
491 return 0;
492}
493
494struct page *
495follow_huge_pmd(struct mm_struct *mm, unsigned long address,
496 pmd_t *pmd, int write)
497{
498 BUG();
499 return NULL;
500}
501
502
503unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
504 unsigned long len, unsigned long pgoff,
505 unsigned long flags)
506{
507 struct hstate *hstate = hstate_file(file);
508 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
509
510 if (!mmu_huge_psizes[mmu_psize])
511 return -EINVAL;
512 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
513}
514
515unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
516{
517 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
518
519 return 1UL << mmu_psize_to_shift(psize);
520}
521
522
523
524
525static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
526 pte_t pte, int trap, unsigned long sz)
527{
528 struct page *page;
529 int i;
530
531 if (!pfn_valid(pte_pfn(pte)))
532 return rflags;
533
534 page = pte_page(pte);
535
536
537 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
538 if (trap == 0x400) {
539 for (i = 0; i < (sz / PAGE_SIZE); i++)
540 __flush_dcache_icache(page_address(page+i));
541 set_bit(PG_arch_1, &page->flags);
542 } else {
543 rflags |= HPTE_R_N;
544 }
545 }
546 return rflags;
547}
548
549int hash_huge_page(struct mm_struct *mm, unsigned long access,
550 unsigned long ea, unsigned long vsid, int local,
551 unsigned long trap)
552{
553 pte_t *ptep;
554 unsigned long old_pte, new_pte;
555 unsigned long va, rflags, pa, sz;
556 long slot;
557 int err = 1;
558 int ssize = user_segment_size(ea);
559 unsigned int mmu_psize;
560 int shift;
561 mmu_psize = get_slice_psize(mm, ea);
562
563 if (!mmu_huge_psizes[mmu_psize])
564 goto out;
565 ptep = huge_pte_offset(mm, ea);
566
567
568 va = hpt_va(ea, vsid, ssize);
569
570
571
572
573
574 if (unlikely(!ptep || pte_none(*ptep)))
575 goto out;
576
577
578
579
580
581 if (unlikely(access & ~pte_val(*ptep)))
582 goto out;
583
584
585
586
587
588
589
590
591
592
593
594
595
596 do {
597 old_pte = pte_val(*ptep);
598 if (old_pte & _PAGE_BUSY)
599 goto out;
600 new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
601 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
602 old_pte, new_pte));
603
604 rflags = 0x2 | (!(new_pte & _PAGE_RW));
605
606 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
607 shift = mmu_psize_to_shift(mmu_psize);
608 sz = ((1UL) << shift);
609 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
610
611
612 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
613 trap, sz);
614
615
616 if (unlikely(old_pte & _PAGE_HASHPTE)) {
617
618 unsigned long hash, slot;
619
620 hash = hpt_hash(va, shift, ssize);
621 if (old_pte & _PAGE_F_SECOND)
622 hash = ~hash;
623 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
624 slot += (old_pte & _PAGE_F_GIX) >> 12;
625
626 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
627 ssize, local) == -1)
628 old_pte &= ~_PAGE_HPTEFLAGS;
629 }
630
631 if (likely(!(old_pte & _PAGE_HASHPTE))) {
632 unsigned long hash = hpt_hash(va, shift, ssize);
633 unsigned long hpte_group;
634
635 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
636
637repeat:
638 hpte_group = ((hash & htab_hash_mask) *
639 HPTES_PER_GROUP) & ~0x7UL;
640
641
642#ifdef CONFIG_PPC_64K_PAGES
643 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
644#else
645 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
646#endif
647
648 rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
649 _PAGE_COHERENT | _PAGE_GUARDED));
650
651
652 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
653 mmu_psize, ssize);
654
655
656 if (unlikely(slot == -1)) {
657 hpte_group = ((~hash & htab_hash_mask) *
658 HPTES_PER_GROUP) & ~0x7UL;
659 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
660 HPTE_V_SECONDARY,
661 mmu_psize, ssize);
662 if (slot == -1) {
663 if (mftb() & 0x1)
664 hpte_group = ((hash & htab_hash_mask) *
665 HPTES_PER_GROUP)&~0x7UL;
666
667 ppc_md.hpte_remove(hpte_group);
668 goto repeat;
669 }
670 }
671
672 if (unlikely(slot == -2))
673 panic("hash_huge_page: pte_insert failed\n");
674
675 new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
676 }
677
678
679
680
681 *ptep = __pte(new_pte & ~_PAGE_BUSY);
682
683 err = 0;
684
685 out:
686 return err;
687}
688
689static void __init set_huge_psize(int psize)
690{
691
692
693 if (mmu_psize_defs[psize].shift &&
694 mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
695 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
696 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
697 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
698
699
700 if (mmu_huge_psizes[psize] ||
701 mmu_psize_defs[psize].shift == PAGE_SHIFT)
702 return;
703 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
704
705 switch (mmu_psize_defs[psize].shift) {
706 case PAGE_SHIFT_64K:
707
708
709
710 hugepte_shift[psize] = PMD_SHIFT;
711 break;
712 case PAGE_SHIFT_16M:
713
714
715 if (PAGE_SHIFT == PAGE_SHIFT_64K)
716 hugepte_shift[psize] = PMD_SHIFT;
717 else
718 hugepte_shift[psize] = PUD_SHIFT;
719 break;
720 case PAGE_SHIFT_16G:
721
722 hugepte_shift[psize] = PGDIR_SHIFT;
723 break;
724 }
725 hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
726 } else
727 hugepte_shift[psize] = 0;
728}
729
730static int __init hugepage_setup_sz(char *str)
731{
732 unsigned long long size;
733 int mmu_psize;
734 int shift;
735
736 size = memparse(str, &str);
737
738 shift = __ffs(size);
739 mmu_psize = shift_to_mmu_psize(shift);
740 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
741 set_huge_psize(mmu_psize);
742 else
743 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
744
745 return 1;
746}
747__setup("hugepagesz=", hugepage_setup_sz);
748
749static int __init hugetlbpage_init(void)
750{
751 unsigned int psize;
752
753 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
754 return -ENODEV;
755
756
757
758
759
760 set_huge_psize(MMU_PAGE_16M);
761 set_huge_psize(MMU_PAGE_16G);
762
763
764
765
766#ifndef CONFIG_SPU_FS_64K_LS
767 set_huge_psize(MMU_PAGE_64K);
768#endif
769
770 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
771 if (mmu_huge_psizes[psize]) {
772 pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =
773 kmem_cache_create(
774 HUGEPTE_CACHE_NAME(psize),
775 HUGEPTE_TABLE_SIZE(psize),
776 HUGEPTE_TABLE_SIZE(psize),
777 0,
778 NULL);
779 if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
780 panic("hugetlbpage_init(): could not create %s"\
781 "\n", HUGEPTE_CACHE_NAME(psize));
782 }
783 }
784
785 return 0;
786}
787
788module_init(hugetlbpage_init);
789