1
2
3
4
5
6
7#include <linux/mman.h>
8#include <linux/kvm_host.h>
9#include <linux/io.h>
10#include <linux/hugetlb.h>
11#include <linux/sched/signal.h>
12#include <trace/events/kvm.h>
13#include <asm/pgalloc.h>
14#include <asm/cacheflush.h>
15#include <asm/kvm_arm.h>
16#include <asm/kvm_mmu.h>
17#include <asm/kvm_pgtable.h>
18#include <asm/kvm_ras.h>
19#include <asm/kvm_asm.h>
20#include <asm/kvm_emulate.h>
21#include <asm/virt.h>
22
23#include "trace.h"
24
25static struct kvm_pgtable *hyp_pgtable;
26static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
27
28static unsigned long hyp_idmap_start;
29static unsigned long hyp_idmap_end;
30static phys_addr_t hyp_idmap_vector;
31
32static unsigned long io_map_base;
33
34
35
36
37
38
39
40
41
42static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
43 phys_addr_t end,
44 int (*fn)(struct kvm_pgtable *, u64, u64),
45 bool resched)
46{
47 int ret;
48 u64 next;
49
50 do {
51 struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
52 if (!pgt)
53 return -EINVAL;
54
55 next = stage2_pgd_addr_end(kvm, addr, end);
56 ret = fn(pgt, addr, next - addr);
57 if (ret)
58 break;
59
60 if (resched && next != end)
61 cond_resched_lock(&kvm->mmu_lock);
62 } while (addr = next, addr != end);
63
64 return ret;
65}
66
67#define stage2_apply_range_resched(kvm, addr, end, fn) \
68 stage2_apply_range(kvm, addr, end, fn, true)
69
70static bool memslot_is_logging(struct kvm_memory_slot *memslot)
71{
72 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
73}
74
75
76
77
78
79
80
81void kvm_flush_remote_tlbs(struct kvm *kvm)
82{
83 kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
84}
85
86static bool kvm_is_device_pfn(unsigned long pfn)
87{
88 return !pfn_is_map_memory(pfn);
89}
90
91static void *stage2_memcache_zalloc_page(void *arg)
92{
93 struct kvm_mmu_memory_cache *mc = arg;
94
95
96 return kvm_mmu_memory_cache_alloc(mc);
97}
98
99static void *kvm_host_zalloc_pages_exact(size_t size)
100{
101 return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
102}
103
104static void kvm_host_get_page(void *addr)
105{
106 get_page(virt_to_page(addr));
107}
108
109static void kvm_host_put_page(void *addr)
110{
111 put_page(virt_to_page(addr));
112}
113
114static int kvm_host_page_count(void *addr)
115{
116 return page_count(virt_to_page(addr));
117}
118
119static phys_addr_t kvm_host_pa(void *addr)
120{
121 return __pa(addr);
122}
123
124static void *kvm_host_va(phys_addr_t phys)
125{
126 return __va(phys);
127}
128
129static void clean_dcache_guest_page(void *va, size_t size)
130{
131 __clean_dcache_guest_page(va, size);
132}
133
134static void invalidate_icache_guest_page(void *va, size_t size)
135{
136 __invalidate_icache_guest_page(va, size);
137}
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
176 bool may_block)
177{
178 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
179 phys_addr_t end = start + size;
180
181 assert_spin_locked(&kvm->mmu_lock);
182 WARN_ON(size & ~PAGE_MASK);
183 WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
184 may_block));
185}
186
187static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
188{
189 __unmap_stage2_range(mmu, start, size, true);
190}
191
192static void stage2_flush_memslot(struct kvm *kvm,
193 struct kvm_memory_slot *memslot)
194{
195 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
196 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
197
198 stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush);
199}
200
201
202
203
204
205
206
207
208static void stage2_flush_vm(struct kvm *kvm)
209{
210 struct kvm_memslots *slots;
211 struct kvm_memory_slot *memslot;
212 int idx;
213
214 idx = srcu_read_lock(&kvm->srcu);
215 spin_lock(&kvm->mmu_lock);
216
217 slots = kvm_memslots(kvm);
218 kvm_for_each_memslot(memslot, slots)
219 stage2_flush_memslot(kvm, memslot);
220
221 spin_unlock(&kvm->mmu_lock);
222 srcu_read_unlock(&kvm->srcu, idx);
223}
224
225
226
227
228void free_hyp_pgds(void)
229{
230 mutex_lock(&kvm_hyp_pgd_mutex);
231 if (hyp_pgtable) {
232 kvm_pgtable_hyp_destroy(hyp_pgtable);
233 kfree(hyp_pgtable);
234 hyp_pgtable = NULL;
235 }
236 mutex_unlock(&kvm_hyp_pgd_mutex);
237}
238
239static bool kvm_host_owns_hyp_mappings(void)
240{
241 if (static_branch_likely(&kvm_protected_mode_initialized))
242 return false;
243
244
245
246
247
248
249 if (!hyp_pgtable && is_protected_kvm_enabled())
250 return false;
251
252 WARN_ON(!hyp_pgtable);
253
254 return true;
255}
256
257static int __create_hyp_mappings(unsigned long start, unsigned long size,
258 unsigned long phys, enum kvm_pgtable_prot prot)
259{
260 int err;
261
262 if (!kvm_host_owns_hyp_mappings()) {
263 return kvm_call_hyp_nvhe(__pkvm_create_mappings,
264 start, size, phys, prot);
265 }
266
267 mutex_lock(&kvm_hyp_pgd_mutex);
268 err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
269 mutex_unlock(&kvm_hyp_pgd_mutex);
270
271 return err;
272}
273
274static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
275{
276 if (!is_vmalloc_addr(kaddr)) {
277 BUG_ON(!virt_addr_valid(kaddr));
278 return __pa(kaddr);
279 } else {
280 return page_to_phys(vmalloc_to_page(kaddr)) +
281 offset_in_page(kaddr);
282 }
283}
284
285
286
287
288
289
290
291
292
293
294
295int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
296{
297 phys_addr_t phys_addr;
298 unsigned long virt_addr;
299 unsigned long start = kern_hyp_va((unsigned long)from);
300 unsigned long end = kern_hyp_va((unsigned long)to);
301
302 if (is_kernel_in_hyp_mode())
303 return 0;
304
305 start = start & PAGE_MASK;
306 end = PAGE_ALIGN(end);
307
308 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
309 int err;
310
311 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
312 err = __create_hyp_mappings(virt_addr, PAGE_SIZE, phys_addr,
313 prot);
314 if (err)
315 return err;
316 }
317
318 return 0;
319}
320
321static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
322 unsigned long *haddr,
323 enum kvm_pgtable_prot prot)
324{
325 unsigned long base;
326 int ret = 0;
327
328 if (!kvm_host_owns_hyp_mappings()) {
329 base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
330 phys_addr, size, prot);
331 if (IS_ERR_OR_NULL((void *)base))
332 return PTR_ERR((void *)base);
333 *haddr = base;
334
335 return 0;
336 }
337
338 mutex_lock(&kvm_hyp_pgd_mutex);
339
340
341
342
343
344
345
346
347
348 size = PAGE_ALIGN(size + offset_in_page(phys_addr));
349 base = io_map_base - size;
350
351
352
353
354
355
356 if ((base ^ io_map_base) & BIT(VA_BITS - 1))
357 ret = -ENOMEM;
358 else
359 io_map_base = base;
360
361 mutex_unlock(&kvm_hyp_pgd_mutex);
362
363 if (ret)
364 goto out;
365
366 ret = __create_hyp_mappings(base, size, phys_addr, prot);
367 if (ret)
368 goto out;
369
370 *haddr = base + offset_in_page(phys_addr);
371out:
372 return ret;
373}
374
375
376
377
378
379
380
381
382int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
383 void __iomem **kaddr,
384 void __iomem **haddr)
385{
386 unsigned long addr;
387 int ret;
388
389 *kaddr = ioremap(phys_addr, size);
390 if (!*kaddr)
391 return -ENOMEM;
392
393 if (is_kernel_in_hyp_mode()) {
394 *haddr = *kaddr;
395 return 0;
396 }
397
398 ret = __create_hyp_private_mapping(phys_addr, size,
399 &addr, PAGE_HYP_DEVICE);
400 if (ret) {
401 iounmap(*kaddr);
402 *kaddr = NULL;
403 *haddr = NULL;
404 return ret;
405 }
406
407 *haddr = (void __iomem *)addr;
408 return 0;
409}
410
411
412
413
414
415
416
417int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
418 void **haddr)
419{
420 unsigned long addr;
421 int ret;
422
423 BUG_ON(is_kernel_in_hyp_mode());
424
425 ret = __create_hyp_private_mapping(phys_addr, size,
426 &addr, PAGE_HYP_EXEC);
427 if (ret) {
428 *haddr = NULL;
429 return ret;
430 }
431
432 *haddr = (void *)addr;
433 return 0;
434}
435
436static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
437 .zalloc_page = stage2_memcache_zalloc_page,
438 .zalloc_pages_exact = kvm_host_zalloc_pages_exact,
439 .free_pages_exact = free_pages_exact,
440 .get_page = kvm_host_get_page,
441 .put_page = kvm_host_put_page,
442 .page_count = kvm_host_page_count,
443 .phys_to_virt = kvm_host_va,
444 .virt_to_phys = kvm_host_pa,
445 .dcache_clean_inval_poc = clean_dcache_guest_page,
446 .icache_inval_pou = invalidate_icache_guest_page,
447};
448
449
450
451
452
453
454
455
456
457
458int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
459{
460 int cpu, err;
461 struct kvm_pgtable *pgt;
462
463 if (mmu->pgt != NULL) {
464 kvm_err("kvm_arch already initialized?\n");
465 return -EINVAL;
466 }
467
468 pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
469 if (!pgt)
470 return -ENOMEM;
471
472 err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
473 if (err)
474 goto out_free_pgtable;
475
476 mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
477 if (!mmu->last_vcpu_ran) {
478 err = -ENOMEM;
479 goto out_destroy_pgtable;
480 }
481
482 for_each_possible_cpu(cpu)
483 *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
484
485 mmu->arch = &kvm->arch;
486 mmu->pgt = pgt;
487 mmu->pgd_phys = __pa(pgt->pgd);
488 mmu->vmid.vmid_gen = 0;
489 return 0;
490
491out_destroy_pgtable:
492 kvm_pgtable_stage2_destroy(pgt);
493out_free_pgtable:
494 kfree(pgt);
495 return err;
496}
497
498static void stage2_unmap_memslot(struct kvm *kvm,
499 struct kvm_memory_slot *memslot)
500{
501 hva_t hva = memslot->userspace_addr;
502 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
503 phys_addr_t size = PAGE_SIZE * memslot->npages;
504 hva_t reg_end = hva + size;
505
506
507
508
509
510
511
512
513
514
515
516
517
518 do {
519 struct vm_area_struct *vma;
520 hva_t vm_start, vm_end;
521
522 vma = find_vma_intersection(current->mm, hva, reg_end);
523 if (!vma)
524 break;
525
526
527
528
529 vm_start = max(hva, vma->vm_start);
530 vm_end = min(reg_end, vma->vm_end);
531
532 if (!(vma->vm_flags & VM_PFNMAP)) {
533 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
534 unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
535 }
536 hva = vm_end;
537 } while (hva < reg_end);
538}
539
540
541
542
543
544
545
546
547void stage2_unmap_vm(struct kvm *kvm)
548{
549 struct kvm_memslots *slots;
550 struct kvm_memory_slot *memslot;
551 int idx;
552
553 idx = srcu_read_lock(&kvm->srcu);
554 mmap_read_lock(current->mm);
555 spin_lock(&kvm->mmu_lock);
556
557 slots = kvm_memslots(kvm);
558 kvm_for_each_memslot(memslot, slots)
559 stage2_unmap_memslot(kvm, memslot);
560
561 spin_unlock(&kvm->mmu_lock);
562 mmap_read_unlock(current->mm);
563 srcu_read_unlock(&kvm->srcu, idx);
564}
565
566void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
567{
568 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
569 struct kvm_pgtable *pgt = NULL;
570
571 spin_lock(&kvm->mmu_lock);
572 pgt = mmu->pgt;
573 if (pgt) {
574 mmu->pgd_phys = 0;
575 mmu->pgt = NULL;
576 free_percpu(mmu->last_vcpu_ran);
577 }
578 spin_unlock(&kvm->mmu_lock);
579
580 if (pgt) {
581 kvm_pgtable_stage2_destroy(pgt);
582 kfree(pgt);
583 }
584}
585
586
587
588
589
590
591
592
593
594
595int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
596 phys_addr_t pa, unsigned long size, bool writable)
597{
598 phys_addr_t addr;
599 int ret = 0;
600 struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
601 struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
602 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
603 KVM_PGTABLE_PROT_R |
604 (writable ? KVM_PGTABLE_PROT_W : 0);
605
606 size += offset_in_page(guest_ipa);
607 guest_ipa &= PAGE_MASK;
608
609 for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
610 ret = kvm_mmu_topup_memory_cache(&cache,
611 kvm_mmu_cache_min_pages(kvm));
612 if (ret)
613 break;
614
615 spin_lock(&kvm->mmu_lock);
616 ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
617 &cache);
618 spin_unlock(&kvm->mmu_lock);
619 if (ret)
620 break;
621
622 pa += PAGE_SIZE;
623 }
624
625 kvm_mmu_free_memory_cache(&cache);
626 return ret;
627}
628
629
630
631
632
633
634
635static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
636{
637 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
638 stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
639}
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
655{
656 struct kvm_memslots *slots = kvm_memslots(kvm);
657 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
658 phys_addr_t start, end;
659
660 if (WARN_ON_ONCE(!memslot))
661 return;
662
663 start = memslot->base_gfn << PAGE_SHIFT;
664 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
665
666 spin_lock(&kvm->mmu_lock);
667 stage2_wp_range(&kvm->arch.mmu, start, end);
668 spin_unlock(&kvm->mmu_lock);
669 kvm_flush_remote_tlbs(kvm);
670}
671
672
673
674
675
676
677
678
679
680
681
682
683static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
684 struct kvm_memory_slot *slot,
685 gfn_t gfn_offset, unsigned long mask)
686{
687 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
688 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
689 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
690
691 stage2_wp_range(&kvm->arch.mmu, start, end);
692}
693
694
695
696
697
698
699
700
701void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
702 struct kvm_memory_slot *slot,
703 gfn_t gfn_offset, unsigned long mask)
704{
705 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
706}
707
708static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
709{
710 send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
711}
712
713static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
714 unsigned long hva,
715 unsigned long map_size)
716{
717 gpa_t gpa_start;
718 hva_t uaddr_start, uaddr_end;
719 size_t size;
720
721
722 if (map_size == PAGE_SIZE)
723 return true;
724
725 size = memslot->npages * PAGE_SIZE;
726
727 gpa_start = memslot->base_gfn << PAGE_SHIFT;
728
729 uaddr_start = memslot->userspace_addr;
730 uaddr_end = uaddr_start + size;
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755 if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
756 return false;
757
758
759
760
761
762
763
764
765
766
767
768
769
770 return (hva & ~(map_size - 1)) >= uaddr_start &&
771 (hva & ~(map_size - 1)) + map_size <= uaddr_end;
772}
773
774
775
776
777
778
779
780
781
782static unsigned long
783transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
784 unsigned long hva, kvm_pfn_t *pfnp,
785 phys_addr_t *ipap)
786{
787 kvm_pfn_t pfn = *pfnp;
788
789
790
791
792
793
794 if (kvm_is_transparent_hugepage(pfn) &&
795 fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814 *ipap &= PMD_MASK;
815 kvm_release_pfn_clean(pfn);
816 pfn &= ~(PTRS_PER_PMD - 1);
817 kvm_get_pfn(pfn);
818 *pfnp = pfn;
819
820 return PMD_SIZE;
821 }
822
823
824 return PAGE_SIZE;
825}
826
827static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
828{
829 unsigned long pa;
830
831 if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
832 return huge_page_shift(hstate_vma(vma));
833
834 if (!(vma->vm_flags & VM_PFNMAP))
835 return PAGE_SHIFT;
836
837 VM_BUG_ON(is_vm_hugetlb_page(vma));
838
839 pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
840
841#ifndef __PAGETABLE_PMD_FOLDED
842 if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
843 ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
844 ALIGN(hva, PUD_SIZE) <= vma->vm_end)
845 return PUD_SHIFT;
846#endif
847
848 if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
849 ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
850 ALIGN(hva, PMD_SIZE) <= vma->vm_end)
851 return PMD_SHIFT;
852
853 return PAGE_SHIFT;
854}
855
856
857
858
859
860
861
862
863
864
865
866
867static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
868 unsigned long size)
869{
870 unsigned long i, nr_pages = size >> PAGE_SHIFT;
871 struct page *page;
872
873 if (!kvm_has_mte(kvm))
874 return 0;
875
876
877
878
879
880 page = pfn_to_online_page(pfn);
881
882 if (!page)
883 return -EFAULT;
884
885 for (i = 0; i < nr_pages; i++, page++) {
886 if (!test_bit(PG_mte_tagged, &page->flags)) {
887 mte_clear_page_tags(page_address(page));
888 set_bit(PG_mte_tagged, &page->flags);
889 }
890 }
891
892 return 0;
893}
894
895static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
896 struct kvm_memory_slot *memslot, unsigned long hva,
897 unsigned long fault_status)
898{
899 int ret = 0;
900 bool write_fault, writable, force_pte = false;
901 bool exec_fault;
902 bool device = false;
903 bool shared;
904 unsigned long mmu_seq;
905 struct kvm *kvm = vcpu->kvm;
906 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
907 struct vm_area_struct *vma;
908 short vma_shift;
909 gfn_t gfn;
910 kvm_pfn_t pfn;
911 bool logging_active = memslot_is_logging(memslot);
912 unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
913 unsigned long vma_pagesize, fault_granule;
914 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
915 struct kvm_pgtable *pgt;
916
917 fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
918 write_fault = kvm_is_write_fault(vcpu);
919 exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
920 VM_BUG_ON(write_fault && exec_fault);
921
922 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
923 kvm_err("Unexpected L2 read permission error\n");
924 return -EFAULT;
925 }
926
927
928
929
930
931 mmap_read_lock(current->mm);
932 vma = vma_lookup(current->mm, hva);
933 if (unlikely(!vma)) {
934 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
935 mmap_read_unlock(current->mm);
936 return -EFAULT;
937 }
938
939
940
941
942
943 if (logging_active) {
944 force_pte = true;
945 vma_shift = PAGE_SHIFT;
946 } else {
947 vma_shift = get_vma_page_shift(vma, hva);
948 }
949
950 shared = (vma->vm_flags & VM_SHARED);
951
952 switch (vma_shift) {
953#ifndef __PAGETABLE_PMD_FOLDED
954 case PUD_SHIFT:
955 if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
956 break;
957 fallthrough;
958#endif
959 case CONT_PMD_SHIFT:
960 vma_shift = PMD_SHIFT;
961 fallthrough;
962 case PMD_SHIFT:
963 if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE))
964 break;
965 fallthrough;
966 case CONT_PTE_SHIFT:
967 vma_shift = PAGE_SHIFT;
968 force_pte = true;
969 fallthrough;
970 case PAGE_SHIFT:
971 break;
972 default:
973 WARN_ONCE(1, "Unknown vma_shift %d", vma_shift);
974 }
975
976 vma_pagesize = 1UL << vma_shift;
977 if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
978 fault_ipa &= ~(vma_pagesize - 1);
979
980 gfn = fault_ipa >> PAGE_SHIFT;
981 mmap_read_unlock(current->mm);
982
983
984
985
986
987
988
989 if (fault_status != FSC_PERM || (logging_active && write_fault)) {
990 ret = kvm_mmu_topup_memory_cache(memcache,
991 kvm_mmu_cache_min_pages(kvm));
992 if (ret)
993 return ret;
994 }
995
996 mmu_seq = vcpu->kvm->mmu_notifier_seq;
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 smp_rmb();
1011
1012 pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
1013 write_fault, &writable, NULL);
1014 if (pfn == KVM_PFN_ERR_HWPOISON) {
1015 kvm_send_hwpoison_signal(hva, vma_shift);
1016 return 0;
1017 }
1018 if (is_error_noslot_pfn(pfn))
1019 return -EFAULT;
1020
1021 if (kvm_is_device_pfn(pfn)) {
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032 device = true;
1033 } else if (logging_active && !write_fault) {
1034
1035
1036
1037
1038 writable = false;
1039 }
1040
1041 if (exec_fault && device)
1042 return -ENOEXEC;
1043
1044 spin_lock(&kvm->mmu_lock);
1045 pgt = vcpu->arch.hw_mmu->pgt;
1046 if (mmu_notifier_retry(kvm, mmu_seq))
1047 goto out_unlock;
1048
1049
1050
1051
1052
1053 if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
1054 vma_pagesize = transparent_hugepage_adjust(memslot, hva,
1055 &pfn, &fault_ipa);
1056
1057 if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
1058
1059 if (!shared)
1060 ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
1061 else
1062 ret = -EFAULT;
1063 if (ret)
1064 goto out_unlock;
1065 }
1066
1067 if (writable)
1068 prot |= KVM_PGTABLE_PROT_W;
1069
1070 if (exec_fault)
1071 prot |= KVM_PGTABLE_PROT_X;
1072
1073 if (device)
1074 prot |= KVM_PGTABLE_PROT_DEVICE;
1075 else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
1076 prot |= KVM_PGTABLE_PROT_X;
1077
1078
1079
1080
1081
1082
1083 if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
1084 ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
1085 } else {
1086 ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
1087 __pfn_to_phys(pfn), prot,
1088 memcache);
1089 }
1090
1091
1092 if (writable && !ret) {
1093 kvm_set_pfn_dirty(pfn);
1094 mark_page_dirty_in_slot(kvm, memslot, gfn);
1095 }
1096
1097out_unlock:
1098 spin_unlock(&kvm->mmu_lock);
1099 kvm_set_pfn_accessed(pfn);
1100 kvm_release_pfn_clean(pfn);
1101 return ret != -EAGAIN ? ret : 0;
1102}
1103
1104
1105static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1106{
1107 pte_t pte;
1108 kvm_pte_t kpte;
1109 struct kvm_s2_mmu *mmu;
1110
1111 trace_kvm_access_fault(fault_ipa);
1112
1113 spin_lock(&vcpu->kvm->mmu_lock);
1114 mmu = vcpu->arch.hw_mmu;
1115 kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
1116 spin_unlock(&vcpu->kvm->mmu_lock);
1117
1118 pte = __pte(kpte);
1119 if (pte_valid(pte))
1120 kvm_set_pfn_accessed(pte_pfn(pte));
1121}
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
1135{
1136 unsigned long fault_status;
1137 phys_addr_t fault_ipa;
1138 struct kvm_memory_slot *memslot;
1139 unsigned long hva;
1140 bool is_iabt, write_fault, writable;
1141 gfn_t gfn;
1142 int ret, idx;
1143
1144 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1145
1146 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
1147 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
1148
1149
1150 if (kvm_vcpu_abt_issea(vcpu)) {
1151
1152
1153
1154
1155 if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
1156 kvm_inject_vabt(vcpu);
1157
1158 return 1;
1159 }
1160
1161 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
1162 kvm_vcpu_get_hfar(vcpu), fault_ipa);
1163
1164
1165 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1166 fault_status != FSC_ACCESS) {
1167 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1168 kvm_vcpu_trap_get_class(vcpu),
1169 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1170 (unsigned long)kvm_vcpu_get_esr(vcpu));
1171 return -EFAULT;
1172 }
1173
1174 idx = srcu_read_lock(&vcpu->kvm->srcu);
1175
1176 gfn = fault_ipa >> PAGE_SHIFT;
1177 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1178 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
1179 write_fault = kvm_is_write_fault(vcpu);
1180 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1181
1182
1183
1184
1185
1186
1187 if (is_iabt) {
1188 ret = -ENOEXEC;
1189 goto out;
1190 }
1191
1192 if (kvm_vcpu_abt_iss1tw(vcpu)) {
1193 kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1194 ret = 1;
1195 goto out_unlock;
1196 }
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
1209 kvm_incr_pc(vcpu);
1210 ret = 1;
1211 goto out_unlock;
1212 }
1213
1214
1215
1216
1217
1218
1219
1220 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
1221 ret = io_mem_abort(vcpu, fault_ipa);
1222 goto out_unlock;
1223 }
1224
1225
1226 VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
1227
1228 if (fault_status == FSC_ACCESS) {
1229 handle_access_fault(vcpu, fault_ipa);
1230 ret = 1;
1231 goto out_unlock;
1232 }
1233
1234 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
1235 if (ret == 0)
1236 ret = 1;
1237out:
1238 if (ret == -ENOEXEC) {
1239 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1240 ret = 1;
1241 }
1242out_unlock:
1243 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1244 return ret;
1245}
1246
1247bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
1248{
1249 if (!kvm->arch.mmu.pgt)
1250 return false;
1251
1252 __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
1253 (range->end - range->start) << PAGE_SHIFT,
1254 range->may_block);
1255
1256 return false;
1257}
1258
1259bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
1260{
1261 kvm_pfn_t pfn = pte_pfn(range->pte);
1262 int ret;
1263
1264 if (!kvm->arch.mmu.pgt)
1265 return false;
1266
1267 WARN_ON(range->end - range->start != 1);
1268
1269 ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
1270 if (ret)
1271 return false;
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283 kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
1284 PAGE_SIZE, __pfn_to_phys(pfn),
1285 KVM_PGTABLE_PROT_R, NULL);
1286
1287 return false;
1288}
1289
1290bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
1291{
1292 u64 size = (range->end - range->start) << PAGE_SHIFT;
1293 kvm_pte_t kpte;
1294 pte_t pte;
1295
1296 if (!kvm->arch.mmu.pgt)
1297 return false;
1298
1299 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
1300
1301 kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
1302 range->start << PAGE_SHIFT);
1303 pte = __pte(kpte);
1304 return pte_valid(pte) && pte_young(pte);
1305}
1306
1307bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
1308{
1309 if (!kvm->arch.mmu.pgt)
1310 return false;
1311
1312 return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
1313 range->start << PAGE_SHIFT);
1314}
1315
1316phys_addr_t kvm_mmu_get_httbr(void)
1317{
1318 return __pa(hyp_pgtable->pgd);
1319}
1320
1321phys_addr_t kvm_get_idmap_vector(void)
1322{
1323 return hyp_idmap_vector;
1324}
1325
1326static int kvm_map_idmap_text(void)
1327{
1328 unsigned long size = hyp_idmap_end - hyp_idmap_start;
1329 int err = __create_hyp_mappings(hyp_idmap_start, size, hyp_idmap_start,
1330 PAGE_HYP_EXEC);
1331 if (err)
1332 kvm_err("Failed to idmap %lx-%lx\n",
1333 hyp_idmap_start, hyp_idmap_end);
1334
1335 return err;
1336}
1337
1338static void *kvm_hyp_zalloc_page(void *arg)
1339{
1340 return (void *)get_zeroed_page(GFP_KERNEL);
1341}
1342
1343static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
1344 .zalloc_page = kvm_hyp_zalloc_page,
1345 .get_page = kvm_host_get_page,
1346 .put_page = kvm_host_put_page,
1347 .phys_to_virt = kvm_host_va,
1348 .virt_to_phys = kvm_host_pa,
1349};
1350
1351int kvm_mmu_init(u32 *hyp_va_bits)
1352{
1353 int err;
1354
1355 hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
1356 hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
1357 hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
1358 hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
1359 hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
1360
1361
1362
1363
1364
1365 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
1366
1367 *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
1368 kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
1369 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
1370 kvm_debug("HYP VA range: %lx:%lx\n",
1371 kern_hyp_va(PAGE_OFFSET),
1372 kern_hyp_va((unsigned long)high_memory - 1));
1373
1374 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
1375 hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
1376 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
1377
1378
1379
1380
1381 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
1382 err = -EINVAL;
1383 goto out;
1384 }
1385
1386 hyp_pgtable = kzalloc(sizeof(*hyp_pgtable), GFP_KERNEL);
1387 if (!hyp_pgtable) {
1388 kvm_err("Hyp mode page-table not allocated\n");
1389 err = -ENOMEM;
1390 goto out;
1391 }
1392
1393 err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops);
1394 if (err)
1395 goto out_free_pgtable;
1396
1397 err = kvm_map_idmap_text();
1398 if (err)
1399 goto out_destroy_pgtable;
1400
1401 io_map_base = hyp_idmap_start;
1402 return 0;
1403
1404out_destroy_pgtable:
1405 kvm_pgtable_hyp_destroy(hyp_pgtable);
1406out_free_pgtable:
1407 kfree(hyp_pgtable);
1408 hyp_pgtable = NULL;
1409out:
1410 return err;
1411}
1412
1413void kvm_arch_commit_memory_region(struct kvm *kvm,
1414 const struct kvm_userspace_memory_region *mem,
1415 struct kvm_memory_slot *old,
1416 const struct kvm_memory_slot *new,
1417 enum kvm_mr_change change)
1418{
1419
1420
1421
1422
1423
1424 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
1425
1426
1427
1428
1429
1430 if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
1431 kvm_mmu_wp_memory_region(kvm, mem->slot);
1432 }
1433 }
1434}
1435
1436int kvm_arch_prepare_memory_region(struct kvm *kvm,
1437 struct kvm_memory_slot *memslot,
1438 const struct kvm_userspace_memory_region *mem,
1439 enum kvm_mr_change change)
1440{
1441 hva_t hva = mem->userspace_addr;
1442 hva_t reg_end = hva + mem->memory_size;
1443 int ret = 0;
1444
1445 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1446 change != KVM_MR_FLAGS_ONLY)
1447 return 0;
1448
1449
1450
1451
1452
1453 if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
1454 return -EFAULT;
1455
1456 mmap_read_lock(current->mm);
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468 do {
1469 struct vm_area_struct *vma;
1470
1471 vma = find_vma_intersection(current->mm, hva, reg_end);
1472 if (!vma)
1473 break;
1474
1475
1476
1477
1478
1479
1480 if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
1481 return -EINVAL;
1482
1483 if (vma->vm_flags & VM_PFNMAP) {
1484
1485 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
1486 ret = -EINVAL;
1487 break;
1488 }
1489 }
1490 hva = min(reg_end, vma->vm_end);
1491 } while (hva < reg_end);
1492
1493 mmap_read_unlock(current->mm);
1494 return ret;
1495}
1496
1497void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
1498{
1499}
1500
1501void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
1502{
1503}
1504
1505void kvm_arch_flush_shadow_all(struct kvm *kvm)
1506{
1507 kvm_free_stage2_pgd(&kvm->arch.mmu);
1508}
1509
1510void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1511 struct kvm_memory_slot *slot)
1512{
1513 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
1514 phys_addr_t size = slot->npages << PAGE_SHIFT;
1515
1516 spin_lock(&kvm->mmu_lock);
1517 unmap_stage2_range(&kvm->arch.mmu, gpa, size);
1518 spin_unlock(&kvm->mmu_lock);
1519}
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549void kvm_set_way_flush(struct kvm_vcpu *vcpu)
1550{
1551 unsigned long hcr = *vcpu_hcr(vcpu);
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562 if (!(hcr & HCR_TVM)) {
1563 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
1564 vcpu_has_cache_enabled(vcpu));
1565 stage2_flush_vm(vcpu->kvm);
1566 *vcpu_hcr(vcpu) = hcr | HCR_TVM;
1567 }
1568}
1569
1570void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
1571{
1572 bool now_enabled = vcpu_has_cache_enabled(vcpu);
1573
1574
1575
1576
1577
1578
1579 if (now_enabled != was_enabled)
1580 stage2_flush_vm(vcpu->kvm);
1581
1582
1583 if (now_enabled)
1584 *vcpu_hcr(vcpu) &= ~HCR_TVM;
1585
1586 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1587}
1588