1
2
3
4
5
6
7
8#include <linux/sched.h>
9#include <linux/mm_types.h>
10#include <linux/mm.h>
11#include <linux/stop_machine.h>
12
13#include <asm/sections.h>
14#include <asm/mmu.h>
15#include <asm/tlb.h>
16
17#include <mm/mmu_decl.h>
18
19#define CREATE_TRACE_POINTS
20#include <trace/events/thp.h>
21
22#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
23#warning Limited user VSID range means pagetable space is wasted
24#endif
25
26#ifdef CONFIG_SPARSEMEM_VMEMMAP
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106int __meminit hash__vmemmap_create_mapping(unsigned long start,
107 unsigned long page_size,
108 unsigned long phys)
109{
110 int rc;
111
112 if ((start + page_size) >= H_VMEMMAP_END) {
113 pr_warn("Outside the supported range\n");
114 return -1;
115 }
116
117 rc = htab_bolt_mapping(start, start + page_size, phys,
118 pgprot_val(PAGE_KERNEL),
119 mmu_vmemmap_psize, mmu_kernel_ssize);
120 if (rc < 0) {
121 int rc2 = htab_remove_mapping(start, start + page_size,
122 mmu_vmemmap_psize,
123 mmu_kernel_ssize);
124 BUG_ON(rc2 && (rc2 != -ENOENT));
125 }
126 return rc;
127}
128
129#ifdef CONFIG_MEMORY_HOTPLUG
130void hash__vmemmap_remove_mapping(unsigned long start,
131 unsigned long page_size)
132{
133 int rc = htab_remove_mapping(start, start + page_size,
134 mmu_vmemmap_psize,
135 mmu_kernel_ssize);
136 BUG_ON((rc < 0) && (rc != -ENOENT));
137 WARN_ON(rc == -ENOENT);
138}
139#endif
140#endif
141
142
143
144
145
146
147int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
148{
149 pgd_t *pgdp;
150 p4d_t *p4dp;
151 pud_t *pudp;
152 pmd_t *pmdp;
153 pte_t *ptep;
154
155 BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
156 if (slab_is_available()) {
157 pgdp = pgd_offset_k(ea);
158 p4dp = p4d_offset(pgdp, ea);
159 pudp = pud_alloc(&init_mm, p4dp, ea);
160 if (!pudp)
161 return -ENOMEM;
162 pmdp = pmd_alloc(&init_mm, pudp, ea);
163 if (!pmdp)
164 return -ENOMEM;
165 ptep = pte_alloc_kernel(pmdp, ea);
166 if (!ptep)
167 return -ENOMEM;
168 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
169 } else {
170
171
172
173
174
175
176 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
177 mmu_io_psize, mmu_kernel_ssize)) {
178 printk(KERN_ERR "Failed to do bolted mapping IO "
179 "memory at %016lx !\n", pa);
180 return -ENOMEM;
181 }
182 }
183
184 smp_wmb();
185 return 0;
186}
187
188#ifdef CONFIG_TRANSPARENT_HUGEPAGE
189
190unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
191 pmd_t *pmdp, unsigned long clr,
192 unsigned long set)
193{
194 __be64 old_be, tmp;
195 unsigned long old;
196
197#ifdef CONFIG_DEBUG_VM
198 WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
199 assert_spin_locked(pmd_lockptr(mm, pmdp));
200#endif
201
202 __asm__ __volatile__(
203 "1: ldarx %0,0,%3\n\
204 and. %1,%0,%6\n\
205 bne- 1b \n\
206 andc %1,%0,%4 \n\
207 or %1,%1,%7\n\
208 stdcx. %1,0,%3 \n\
209 bne- 1b"
210 : "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
211 : "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
212 "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
213 : "cc" );
214
215 old = be64_to_cpu(old_be);
216
217 trace_hugepage_update(addr, old, clr, set);
218 if (old & H_PAGE_HASHPTE)
219 hpte_do_hugepage_flush(mm, addr, pmdp, old);
220 return old;
221}
222
223pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
224 pmd_t *pmdp)
225{
226 pmd_t pmd;
227
228 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
229 VM_BUG_ON(pmd_trans_huge(*pmdp));
230 VM_BUG_ON(pmd_devmap(*pmdp));
231
232 pmd = *pmdp;
233 pmd_clear(pmdp);
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249 serialize_against_pte_lookup(vma->vm_mm);
250
251
252
253
254
255
256
257
258
259 flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
260 return pmd;
261}
262
263
264
265
266
267void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
268 pgtable_t pgtable)
269{
270 pgtable_t *pgtable_slot;
271
272 assert_spin_locked(pmd_lockptr(mm, pmdp));
273
274
275
276 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
277 *pgtable_slot = pgtable;
278
279
280
281
282
283
284 smp_wmb();
285}
286
287pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
288{
289 pgtable_t pgtable;
290 pgtable_t *pgtable_slot;
291
292 assert_spin_locked(pmd_lockptr(mm, pmdp));
293
294 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
295 pgtable = *pgtable_slot;
296
297
298
299 *pgtable_slot = NULL;
300
301
302
303
304 memset(pgtable, 0, PTE_FRAG_SIZE);
305 return pgtable;
306}
307
308
309
310
311
312void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
313 pmd_t *pmdp, unsigned long old_pmd)
314{
315 int ssize;
316 unsigned int psize;
317 unsigned long vsid;
318 unsigned long flags = 0;
319
320
321#ifdef CONFIG_DEBUG_VM
322 psize = get_slice_psize(mm, addr);
323 BUG_ON(psize == MMU_PAGE_16M);
324#endif
325 if (old_pmd & H_PAGE_COMBO)
326 psize = MMU_PAGE_4K;
327 else
328 psize = MMU_PAGE_64K;
329
330 if (!is_kernel_addr(addr)) {
331 ssize = user_segment_size(addr);
332 vsid = get_user_vsid(&mm->context, addr, ssize);
333 WARN_ON(vsid == 0);
334 } else {
335 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
336 ssize = mmu_kernel_ssize;
337 }
338
339 if (mm_is_thread_local(mm))
340 flags |= HPTE_LOCAL_UPDATE;
341
342 return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
343}
344
345pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
346 unsigned long addr, pmd_t *pmdp)
347{
348 pmd_t old_pmd;
349 pgtable_t pgtable;
350 unsigned long old;
351 pgtable_t *pgtable_slot;
352
353 old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
354 old_pmd = __pmd(old);
355
356
357
358
359
360 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
361 pgtable = *pgtable_slot;
362
363
364
365
366 memset(pgtable, 0, PTE_FRAG_SIZE);
367 return old_pmd;
368}
369
370int hash__has_transparent_hugepage(void)
371{
372
373 if (!mmu_has_feature(MMU_FTR_16M_PAGE))
374 return 0;
375
376
377
378 if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
379 return 0;
380
381
382
383
384
385
386
387
388 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
389 (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
390 return 0;
391
392
393
394 if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
395 return 0;
396
397 return 1;
398}
399EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
400
401#endif
402
403#ifdef CONFIG_STRICT_KERNEL_RWX
404
405struct change_memory_parms {
406 unsigned long start, end, newpp;
407 unsigned int step, nr_cpus, master_cpu;
408 atomic_t cpu_counter;
409};
410
411
412static struct change_memory_parms chmem_parms;
413
414
415static DEFINE_MUTEX(chmem_lock);
416
417static void change_memory_range(unsigned long start, unsigned long end,
418 unsigned int step, unsigned long newpp)
419{
420 unsigned long idx;
421
422 pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
423 start, end, newpp, step);
424
425 for (idx = start; idx < end; idx += step)
426
427 mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
428 mmu_kernel_ssize);
429}
430
431static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
432{
433 unsigned long msr, tmp, flags;
434 int *p;
435
436 p = &parms->cpu_counter.counter;
437
438 local_irq_save(flags);
439 hard_irq_disable();
440
441 asm volatile (
442
443 "mfmsr %[msr] ;"
444 "li %[tmp], %[MSR_IR_DR] ;"
445 "andc %[tmp], %[msr], %[tmp] ;"
446 "mtmsrd %[tmp] ;"
447
448
449 "1: "
450 "lwarx %[tmp], 0, %[p] ;"
451 "addic %[tmp], %[tmp], -1 ;"
452 "stwcx. %[tmp], 0, %[p] ;"
453 "bne- 1b ;"
454
455
456 "2: ;"
457 "lwz %[tmp], 0(%[p]) ;"
458 "cmpwi %[tmp], 0 ;"
459 "bne- 2b ;"
460
461
462 "mtmsrd %[msr] ;"
463
464 :
465 [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
466 :
467 [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
468 :
469 "cc", "xer"
470 );
471
472 local_irq_restore(flags);
473
474 return 0;
475}
476
477static int change_memory_range_fn(void *data)
478{
479 struct change_memory_parms *parms = data;
480
481 if (parms->master_cpu != smp_processor_id())
482 return chmem_secondary_loop(parms);
483
484
485 while (atomic_read(&parms->cpu_counter) > 1)
486 barrier();
487
488 change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
489
490 mb();
491
492
493 atomic_dec(&parms->cpu_counter);
494
495 return 0;
496}
497
498static bool hash__change_memory_range(unsigned long start, unsigned long end,
499 unsigned long newpp)
500{
501 unsigned int step, shift;
502
503 shift = mmu_psize_defs[mmu_linear_psize].shift;
504 step = 1 << shift;
505
506 start = ALIGN_DOWN(start, step);
507 end = ALIGN(end, step);
508
509 if (start >= end)
510 return false;
511
512 if (firmware_has_feature(FW_FEATURE_LPAR)) {
513 mutex_lock(&chmem_lock);
514
515 chmem_parms.start = start;
516 chmem_parms.end = end;
517 chmem_parms.step = step;
518 chmem_parms.newpp = newpp;
519 chmem_parms.master_cpu = smp_processor_id();
520
521 cpus_read_lock();
522
523 atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
524
525
526 mb();
527
528 stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
529 cpu_online_mask);
530
531 cpus_read_unlock();
532 mutex_unlock(&chmem_lock);
533 } else
534 change_memory_range(start, end, step, newpp);
535
536 return true;
537}
538
539void hash__mark_rodata_ro(void)
540{
541 unsigned long start, end, pp;
542
543 start = (unsigned long)_stext;
544 end = (unsigned long)__init_begin;
545
546 pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
547
548 WARN_ON(!hash__change_memory_range(start, end, pp));
549}
550
551void hash__mark_initmem_nx(void)
552{
553 unsigned long start, end, pp;
554
555 start = (unsigned long)__init_begin;
556 end = (unsigned long)__init_end;
557
558 pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
559
560 WARN_ON(!hash__change_memory_range(start, end, pp));
561}
562#endif
563