1#define pr_fmt(fmt) "SVM: " fmt
2
3#include <linux/kvm_host.h>
4
5#include "irq.h"
6#include "mmu.h"
7#include "kvm_cache_regs.h"
8#include "x86.h"
9#include "cpuid.h"
10#include "pmu.h"
11
12#include <linux/module.h>
13#include <linux/mod_devicetable.h>
14#include <linux/kernel.h>
15#include <linux/vmalloc.h>
16#include <linux/highmem.h>
17#include <linux/amd-iommu.h>
18#include <linux/sched.h>
19#include <linux/trace_events.h>
20#include <linux/slab.h>
21#include <linux/hashtable.h>
22#include <linux/objtool.h>
23#include <linux/psp-sev.h>
24#include <linux/file.h>
25#include <linux/pagemap.h>
26#include <linux/swap.h>
27#include <linux/rwsem.h>
28
29#include <asm/apic.h>
30#include <asm/perf_event.h>
31#include <asm/tlbflush.h>
32#include <asm/desc.h>
33#include <asm/debugreg.h>
34#include <asm/kvm_para.h>
35#include <asm/irq_remapping.h>
36#include <asm/spec-ctrl.h>
37#include <asm/cpu_device_id.h>
38#include <asm/traps.h>
39
40#include <asm/virtext.h>
41#include "trace.h"
42
43#include "svm.h"
44#include "svm_ops.h"
45
46#include "kvm_onhyperv.h"
47#include "svm_onhyperv.h"
48
49#define __ex(x) __kvm_handle_fault_on_reboot(x)
50
51MODULE_AUTHOR("Qumranet");
52MODULE_LICENSE("GPL");
53
54#ifdef MODULE
55static const struct x86_cpu_id svm_cpu_id[] = {
56 X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL),
57 {}
58};
59MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
60#endif
61
62#define SEG_TYPE_LDT 2
63#define SEG_TYPE_BUSY_TSS16 3
64
65#define SVM_FEATURE_LBRV (1 << 1)
66#define SVM_FEATURE_SVML (1 << 2)
67#define SVM_FEATURE_TSC_RATE (1 << 4)
68#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
69#define SVM_FEATURE_FLUSH_ASID (1 << 6)
70#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
71#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
72
73#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
74
75#define TSC_RATIO_RSVD 0xffffff0000000000ULL
76#define TSC_RATIO_MIN 0x0000000000000001ULL
77#define TSC_RATIO_MAX 0x000000ffffffffffULL
78
79static bool erratum_383_found __read_mostly;
80
81u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
82
83
84
85
86
87static uint64_t osvw_len = 4, osvw_status;
88
89static DEFINE_PER_CPU(u64, current_tsc_ratio);
90#define TSC_RATIO_DEFAULT 0x0100000000ULL
91
92static const struct svm_direct_access_msrs {
93 u32 index;
94 bool always;
95} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
96 { .index = MSR_STAR, .always = true },
97 { .index = MSR_IA32_SYSENTER_CS, .always = true },
98 { .index = MSR_IA32_SYSENTER_EIP, .always = false },
99 { .index = MSR_IA32_SYSENTER_ESP, .always = false },
100#ifdef CONFIG_X86_64
101 { .index = MSR_GS_BASE, .always = true },
102 { .index = MSR_FS_BASE, .always = true },
103 { .index = MSR_KERNEL_GS_BASE, .always = true },
104 { .index = MSR_LSTAR, .always = true },
105 { .index = MSR_CSTAR, .always = true },
106 { .index = MSR_SYSCALL_MASK, .always = true },
107#endif
108 { .index = MSR_IA32_SPEC_CTRL, .always = false },
109 { .index = MSR_IA32_PRED_CMD, .always = false },
110 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
111 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
112 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
113 { .index = MSR_IA32_LASTINTTOIP, .always = false },
114 { .index = MSR_EFER, .always = false },
115 { .index = MSR_IA32_CR_PAT, .always = false },
116 { .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
117 { .index = MSR_INVALID, .always = false },
118};
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
151module_param(pause_filter_thresh, ushort, 0444);
152
153static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
154module_param(pause_filter_count, ushort, 0444);
155
156
157static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
158module_param(pause_filter_count_grow, ushort, 0444);
159
160
161static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
162module_param(pause_filter_count_shrink, ushort, 0444);
163
164
165static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
166module_param(pause_filter_count_max, ushort, 0444);
167
168
169
170
171
172bool npt_enabled = true;
173module_param_named(npt, npt_enabled, bool, 0444);
174
175
176static int nested = true;
177module_param(nested, int, S_IRUGO);
178
179
180static int nrips = true;
181module_param(nrips, int, 0444);
182
183
184static int vls = true;
185module_param(vls, int, 0444);
186
187
188static int vgif = true;
189module_param(vgif, int, 0444);
190
191
192
193
194
195static bool avic;
196module_param(avic, bool, 0444);
197
198bool __read_mostly dump_invalid_vmcb;
199module_param(dump_invalid_vmcb, bool, 0644);
200
201
202bool intercept_smi = true;
203module_param(intercept_smi, bool, 0444);
204
205
206static bool svm_gp_erratum_intercept = true;
207
208static u8 rsm_ins_bytes[] = "\x0f\xaa";
209
210static unsigned long iopm_base;
211
212struct kvm_ldttss_desc {
213 u16 limit0;
214 u16 base0;
215 unsigned base1:8, type:5, dpl:2, p:1;
216 unsigned limit1:4, zero0:3, g:1, base2:8;
217 u32 base3;
218 u32 zero1;
219} __attribute__((packed));
220
221DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
222
223
224
225
226
227
228
229
230static int tsc_aux_uret_slot __read_mostly = -1;
231
232static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
233
234#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
235#define MSRS_RANGE_SIZE 2048
236#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
237
238u32 svm_msrpm_offset(u32 msr)
239{
240 u32 offset;
241 int i;
242
243 for (i = 0; i < NUM_MSR_MAPS; i++) {
244 if (msr < msrpm_ranges[i] ||
245 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
246 continue;
247
248 offset = (msr - msrpm_ranges[i]) / 4;
249 offset += (i * MSRS_RANGE_SIZE);
250
251
252 return offset / 4;
253 }
254
255
256 return MSR_INVALID;
257}
258
259#define MAX_INST_SIZE 15
260
261static int get_max_npt_level(void)
262{
263#ifdef CONFIG_X86_64
264 return PT64_ROOT_4LEVEL;
265#else
266 return PT32E_ROOT_LEVEL;
267#endif
268}
269
270int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
271{
272 struct vcpu_svm *svm = to_svm(vcpu);
273 u64 old_efer = vcpu->arch.efer;
274 vcpu->arch.efer = efer;
275
276 if (!npt_enabled) {
277
278 efer |= EFER_NX;
279
280 if (!(efer & EFER_LMA))
281 efer &= ~EFER_LME;
282 }
283
284 if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
285 if (!(efer & EFER_SVME)) {
286 svm_leave_nested(svm);
287 svm_set_gif(svm, true);
288
289 if (!enable_vmware_backdoor)
290 clr_exception_intercept(svm, GP_VECTOR);
291
292
293
294
295
296
297 if (!is_smm(vcpu))
298 svm_free_nested(svm);
299
300 } else {
301 int ret = svm_allocate_nested(svm);
302
303 if (ret) {
304 vcpu->arch.efer = old_efer;
305 return ret;
306 }
307
308 if (svm_gp_erratum_intercept)
309 set_exception_intercept(svm, GP_VECTOR);
310 }
311 }
312
313 svm->vmcb->save.efer = efer | EFER_SVME;
314 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
315 return 0;
316}
317
318static int is_external_interrupt(u32 info)
319{
320 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
321 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
322}
323
324static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
325{
326 struct vcpu_svm *svm = to_svm(vcpu);
327 u32 ret = 0;
328
329 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
330 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
331 return ret;
332}
333
334static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
335{
336 struct vcpu_svm *svm = to_svm(vcpu);
337
338 if (mask == 0)
339 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
340 else
341 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
342
343}
344
345static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
346{
347 struct vcpu_svm *svm = to_svm(vcpu);
348
349
350
351
352
353 if (sev_es_guest(vcpu->kvm))
354 goto done;
355
356 if (nrips && svm->vmcb->control.next_rip != 0) {
357 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
358 svm->next_rip = svm->vmcb->control.next_rip;
359 }
360
361 if (!svm->next_rip) {
362 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
363 return 0;
364 } else {
365 kvm_rip_write(vcpu, svm->next_rip);
366 }
367
368done:
369 svm_set_interrupt_shadow(vcpu, 0);
370
371 return 1;
372}
373
374static void svm_queue_exception(struct kvm_vcpu *vcpu)
375{
376 struct vcpu_svm *svm = to_svm(vcpu);
377 unsigned nr = vcpu->arch.exception.nr;
378 bool has_error_code = vcpu->arch.exception.has_error_code;
379 u32 error_code = vcpu->arch.exception.error_code;
380
381 kvm_deliver_exception_payload(vcpu);
382
383 if (nr == BP_VECTOR && !nrips) {
384 unsigned long rip, old_rip = kvm_rip_read(vcpu);
385
386
387
388
389
390
391
392
393 (void)skip_emulated_instruction(vcpu);
394 rip = kvm_rip_read(vcpu);
395 svm->int3_rip = rip + svm->vmcb->save.cs.base;
396 svm->int3_injected = rip - old_rip;
397 }
398
399 svm->vmcb->control.event_inj = nr
400 | SVM_EVTINJ_VALID
401 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
402 | SVM_EVTINJ_TYPE_EXEPT;
403 svm->vmcb->control.event_inj_err = error_code;
404}
405
406static void svm_init_erratum_383(void)
407{
408 u32 low, high;
409 int err;
410 u64 val;
411
412 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
413 return;
414
415
416 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
417 if (err)
418 return;
419
420 val |= (1ULL << 47);
421
422 low = lower_32_bits(val);
423 high = upper_32_bits(val);
424
425 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
426
427 erratum_383_found = true;
428}
429
430static void svm_init_osvw(struct kvm_vcpu *vcpu)
431{
432
433
434
435
436 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
437 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
438
439
440
441
442
443
444
445
446
447 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
448 vcpu->arch.osvw.status |= 1;
449}
450
451static int has_svm(void)
452{
453 const char *msg;
454
455 if (!cpu_has_svm(&msg)) {
456 printk(KERN_INFO "has_svm: %s\n", msg);
457 return 0;
458 }
459
460 if (sev_active()) {
461 pr_info("KVM is unsupported when running as an SEV guest\n");
462 return 0;
463 }
464
465 if (pgtable_l5_enabled()) {
466 pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
467 return 0;
468 }
469
470 return 1;
471}
472
473static void svm_hardware_disable(void)
474{
475
476 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
477 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
478
479 cpu_svm_disable();
480
481 amd_pmu_disable_virt();
482}
483
484static int svm_hardware_enable(void)
485{
486
487 struct svm_cpu_data *sd;
488 uint64_t efer;
489 struct desc_struct *gdt;
490 int me = raw_smp_processor_id();
491
492 rdmsrl(MSR_EFER, efer);
493 if (efer & EFER_SVME)
494 return -EBUSY;
495
496 if (!has_svm()) {
497 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
498 return -EINVAL;
499 }
500 sd = per_cpu(svm_data, me);
501 if (!sd) {
502 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
503 return -EINVAL;
504 }
505
506 sd->asid_generation = 1;
507 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
508 sd->next_asid = sd->max_asid + 1;
509 sd->min_asid = max_sev_asid + 1;
510
511 gdt = get_current_gdt_rw();
512 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
513
514 wrmsrl(MSR_EFER, efer | EFER_SVME);
515
516 wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
517
518 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
519 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
520 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
521 }
522
523
524
525
526
527
528
529
530
531
532
533 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
534 uint64_t len, status = 0;
535 int err;
536
537 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
538 if (!err)
539 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
540 &err);
541
542 if (err)
543 osvw_status = osvw_len = 0;
544 else {
545 if (len < osvw_len)
546 osvw_len = len;
547 osvw_status |= status;
548 osvw_status &= (1ULL << osvw_len) - 1;
549 }
550 } else
551 osvw_status = osvw_len = 0;
552
553 svm_init_erratum_383();
554
555 amd_pmu_enable_virt();
556
557 return 0;
558}
559
560static void svm_cpu_uninit(int cpu)
561{
562 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
563
564 if (!sd)
565 return;
566
567 per_cpu(svm_data, cpu) = NULL;
568 kfree(sd->sev_vmcbs);
569 __free_page(sd->save_area);
570 kfree(sd);
571}
572
573static int svm_cpu_init(int cpu)
574{
575 struct svm_cpu_data *sd;
576 int ret = -ENOMEM;
577
578 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
579 if (!sd)
580 return ret;
581 sd->cpu = cpu;
582 sd->save_area = alloc_page(GFP_KERNEL);
583 if (!sd->save_area)
584 goto free_cpu_data;
585
586 clear_page(page_address(sd->save_area));
587
588 ret = sev_cpu_init(sd);
589 if (ret)
590 goto free_save_area;
591
592 per_cpu(svm_data, cpu) = sd;
593
594 return 0;
595
596free_save_area:
597 __free_page(sd->save_area);
598free_cpu_data:
599 kfree(sd);
600 return ret;
601
602}
603
604static int direct_access_msr_slot(u32 msr)
605{
606 u32 i;
607
608 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
609 if (direct_access_msrs[i].index == msr)
610 return i;
611
612 return -ENOENT;
613}
614
615static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
616 int write)
617{
618 struct vcpu_svm *svm = to_svm(vcpu);
619 int slot = direct_access_msr_slot(msr);
620
621 if (slot == -ENOENT)
622 return;
623
624
625 if (read)
626 set_bit(slot, svm->shadow_msr_intercept.read);
627 else
628 clear_bit(slot, svm->shadow_msr_intercept.read);
629
630 if (write)
631 set_bit(slot, svm->shadow_msr_intercept.write);
632 else
633 clear_bit(slot, svm->shadow_msr_intercept.write);
634}
635
636static bool valid_msr_intercept(u32 index)
637{
638 return direct_access_msr_slot(index) != -ENOENT;
639}
640
641static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
642{
643 u8 bit_write;
644 unsigned long tmp;
645 u32 offset;
646 u32 *msrpm;
647
648 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
649 to_svm(vcpu)->msrpm;
650
651 offset = svm_msrpm_offset(msr);
652 bit_write = 2 * (msr & 0x0f) + 1;
653 tmp = msrpm[offset];
654
655 BUG_ON(offset == MSR_INVALID);
656
657 return !!test_bit(bit_write, &tmp);
658}
659
660static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
661 u32 msr, int read, int write)
662{
663 u8 bit_read, bit_write;
664 unsigned long tmp;
665 u32 offset;
666
667
668
669
670
671 WARN_ON(!valid_msr_intercept(msr));
672
673
674 if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
675 read = 0;
676
677 if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
678 write = 0;
679
680 offset = svm_msrpm_offset(msr);
681 bit_read = 2 * (msr & 0x0f);
682 bit_write = 2 * (msr & 0x0f) + 1;
683 tmp = msrpm[offset];
684
685 BUG_ON(offset == MSR_INVALID);
686
687 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
688 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
689
690 msrpm[offset] = tmp;
691
692 svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
693
694}
695
696void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
697 int read, int write)
698{
699 set_shadow_msr_intercept(vcpu, msr, read, write);
700 set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
701}
702
703u32 *svm_vcpu_alloc_msrpm(void)
704{
705 unsigned int order = get_order(MSRPM_SIZE);
706 struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, order);
707 u32 *msrpm;
708
709 if (!pages)
710 return NULL;
711
712 msrpm = page_address(pages);
713 memset(msrpm, 0xff, PAGE_SIZE * (1 << order));
714
715 return msrpm;
716}
717
718void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
719{
720 int i;
721
722 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
723 if (!direct_access_msrs[i].always)
724 continue;
725 set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1);
726 }
727}
728
729
730void svm_vcpu_free_msrpm(u32 *msrpm)
731{
732 __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE));
733}
734
735static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
736{
737 struct vcpu_svm *svm = to_svm(vcpu);
738 u32 i;
739
740
741
742
743
744
745 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
746 u32 msr = direct_access_msrs[i].index;
747 u32 read = test_bit(i, svm->shadow_msr_intercept.read);
748 u32 write = test_bit(i, svm->shadow_msr_intercept.write);
749
750 set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write);
751 }
752}
753
754static void add_msr_offset(u32 offset)
755{
756 int i;
757
758 for (i = 0; i < MSRPM_OFFSETS; ++i) {
759
760
761 if (msrpm_offsets[i] == offset)
762 return;
763
764
765 if (msrpm_offsets[i] != MSR_INVALID)
766 continue;
767
768
769 msrpm_offsets[i] = offset;
770
771 return;
772 }
773
774
775
776
777
778 BUG();
779}
780
781static void init_msrpm_offsets(void)
782{
783 int i;
784
785 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
786
787 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
788 u32 offset;
789
790 offset = svm_msrpm_offset(direct_access_msrs[i].index);
791 BUG_ON(offset == MSR_INVALID);
792
793 add_msr_offset(offset);
794 }
795}
796
797static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
798{
799 struct vcpu_svm *svm = to_svm(vcpu);
800
801 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
802 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
803 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
804 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
805 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
806}
807
808static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
809{
810 struct vcpu_svm *svm = to_svm(vcpu);
811
812 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
813 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
814 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
815 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
816 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
817}
818
819void disable_nmi_singlestep(struct vcpu_svm *svm)
820{
821 svm->nmi_singlestep = false;
822
823 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
824
825 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
826 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
827 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
828 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
829 }
830}
831
832static void grow_ple_window(struct kvm_vcpu *vcpu)
833{
834 struct vcpu_svm *svm = to_svm(vcpu);
835 struct vmcb_control_area *control = &svm->vmcb->control;
836 int old = control->pause_filter_count;
837
838 control->pause_filter_count = __grow_ple_window(old,
839 pause_filter_count,
840 pause_filter_count_grow,
841 pause_filter_count_max);
842
843 if (control->pause_filter_count != old) {
844 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
845 trace_kvm_ple_window_update(vcpu->vcpu_id,
846 control->pause_filter_count, old);
847 }
848}
849
850static void shrink_ple_window(struct kvm_vcpu *vcpu)
851{
852 struct vcpu_svm *svm = to_svm(vcpu);
853 struct vmcb_control_area *control = &svm->vmcb->control;
854 int old = control->pause_filter_count;
855
856 control->pause_filter_count =
857 __shrink_ple_window(old,
858 pause_filter_count,
859 pause_filter_count_shrink,
860 pause_filter_count);
861 if (control->pause_filter_count != old) {
862 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
863 trace_kvm_ple_window_update(vcpu->vcpu_id,
864 control->pause_filter_count, old);
865 }
866}
867
868
869
870
871
872
873
874static __init void svm_adjust_mmio_mask(void)
875{
876 unsigned int enc_bit, mask_bit;
877 u64 msr, mask;
878
879
880 if (cpuid_eax(0x80000000) < 0x8000001f)
881 return;
882
883
884 rdmsrl(MSR_AMD64_SYSCFG, msr);
885 if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
886 return;
887
888 enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
889 mask_bit = boot_cpu_data.x86_phys_bits;
890
891
892 if (enc_bit == mask_bit)
893 mask_bit++;
894
895
896
897
898
899
900
901
902
903
904 mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
905
906 kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
907}
908
909static void svm_hardware_teardown(void)
910{
911 int cpu;
912
913 sev_hardware_teardown();
914
915 for_each_possible_cpu(cpu)
916 svm_cpu_uninit(cpu);
917
918 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT),
919 get_order(IOPM_SIZE));
920 iopm_base = 0;
921}
922
923static __init void svm_set_cpu_caps(void)
924{
925 kvm_set_cpu_caps();
926
927 supported_xss = 0;
928
929
930 if (nested) {
931 kvm_cpu_cap_set(X86_FEATURE_SVM);
932
933 if (nrips)
934 kvm_cpu_cap_set(X86_FEATURE_NRIPS);
935
936 if (npt_enabled)
937 kvm_cpu_cap_set(X86_FEATURE_NPT);
938
939
940 kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
941 }
942
943
944 if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
945 boot_cpu_has(X86_FEATURE_AMD_SSBD))
946 kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
947
948
949 sev_set_cpu_caps();
950}
951
952static __init int svm_hardware_setup(void)
953{
954 int cpu;
955 struct page *iopm_pages;
956 void *iopm_va;
957 int r;
958 unsigned int order = get_order(IOPM_SIZE);
959
960
961
962
963
964 if (!boot_cpu_has(X86_FEATURE_NX)) {
965 pr_err_ratelimited("NX (Execute Disable) not supported\n");
966 return -EOPNOTSUPP;
967 }
968 kvm_enable_efer_bits(EFER_NX);
969
970 iopm_pages = alloc_pages(GFP_KERNEL, order);
971
972 if (!iopm_pages)
973 return -ENOMEM;
974
975 iopm_va = page_address(iopm_pages);
976 memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
977 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
978
979 init_msrpm_offsets();
980
981 supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
982
983 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
984 kvm_enable_efer_bits(EFER_FFXSR);
985
986 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
987 kvm_has_tsc_control = true;
988 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
989 kvm_tsc_scaling_ratio_frac_bits = 32;
990 }
991
992 tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
993
994
995 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
996 pause_filter_count = 0;
997 pause_filter_thresh = 0;
998 } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
999 pause_filter_thresh = 0;
1000 }
1001
1002 if (nested) {
1003 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
1004 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
1005 }
1006
1007
1008
1009
1010
1011
1012 if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
1013 npt_enabled = false;
1014
1015 if (!boot_cpu_has(X86_FEATURE_NPT))
1016 npt_enabled = false;
1017
1018 kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
1019 pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
1020
1021
1022 sev_hardware_setup();
1023
1024 svm_hv_hardware_setup();
1025
1026 svm_adjust_mmio_mask();
1027
1028 for_each_possible_cpu(cpu) {
1029 r = svm_cpu_init(cpu);
1030 if (r)
1031 goto err;
1032 }
1033
1034 if (nrips) {
1035 if (!boot_cpu_has(X86_FEATURE_NRIPS))
1036 nrips = false;
1037 }
1038
1039 enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
1040
1041 if (enable_apicv) {
1042 pr_info("AVIC enabled\n");
1043
1044 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1045 }
1046
1047 if (vls) {
1048 if (!npt_enabled ||
1049 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
1050 !IS_ENABLED(CONFIG_X86_64)) {
1051 vls = false;
1052 } else {
1053 pr_info("Virtual VMLOAD VMSAVE supported\n");
1054 }
1055 }
1056
1057 if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
1058 svm_gp_erratum_intercept = false;
1059
1060 if (vgif) {
1061 if (!boot_cpu_has(X86_FEATURE_VGIF))
1062 vgif = false;
1063 else
1064 pr_info("Virtual GIF supported\n");
1065 }
1066
1067 svm_set_cpu_caps();
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082 allow_smaller_maxphyaddr = !npt_enabled;
1083
1084 return 0;
1085
1086err:
1087 svm_hardware_teardown();
1088 return r;
1089}
1090
1091static void init_seg(struct vmcb_seg *seg)
1092{
1093 seg->selector = 0;
1094 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
1095 SVM_SELECTOR_WRITE_MASK;
1096 seg->limit = 0xffff;
1097 seg->base = 0;
1098}
1099
1100static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1101{
1102 seg->selector = 0;
1103 seg->attrib = SVM_SELECTOR_P_MASK | type;
1104 seg->limit = 0xffff;
1105 seg->base = 0;
1106}
1107
1108static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
1109{
1110 struct vcpu_svm *svm = to_svm(vcpu);
1111
1112 return svm->nested.ctl.tsc_offset;
1113}
1114
1115static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
1116{
1117 return kvm_default_tsc_scaling_ratio;
1118}
1119
1120static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1121{
1122 struct vcpu_svm *svm = to_svm(vcpu);
1123
1124 svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
1125 svm->vmcb->control.tsc_offset = offset;
1126 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1127}
1128
1129static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
1130{
1131 wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
1132}
1133
1134
1135static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
1136 struct vcpu_svm *svm)
1137{
1138
1139
1140
1141
1142 if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
1143 if (!npt_enabled ||
1144 !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
1145 svm_set_intercept(svm, INTERCEPT_INVPCID);
1146 else
1147 svm_clr_intercept(svm, INTERCEPT_INVPCID);
1148 }
1149
1150 if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
1151 if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
1152 svm_clr_intercept(svm, INTERCEPT_RDTSCP);
1153 else
1154 svm_set_intercept(svm, INTERCEPT_RDTSCP);
1155 }
1156}
1157
1158static void init_vmcb(struct kvm_vcpu *vcpu)
1159{
1160 struct vcpu_svm *svm = to_svm(vcpu);
1161 struct vmcb_control_area *control = &svm->vmcb->control;
1162 struct vmcb_save_area *save = &svm->vmcb->save;
1163
1164 vcpu->arch.hflags = 0;
1165
1166 svm_set_intercept(svm, INTERCEPT_CR0_READ);
1167 svm_set_intercept(svm, INTERCEPT_CR3_READ);
1168 svm_set_intercept(svm, INTERCEPT_CR4_READ);
1169 svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
1170 svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
1171 svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
1172 if (!kvm_vcpu_apicv_active(vcpu))
1173 svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
1174
1175 set_dr_intercepts(svm);
1176
1177 set_exception_intercept(svm, PF_VECTOR);
1178 set_exception_intercept(svm, UD_VECTOR);
1179 set_exception_intercept(svm, MC_VECTOR);
1180 set_exception_intercept(svm, AC_VECTOR);
1181 set_exception_intercept(svm, DB_VECTOR);
1182
1183
1184
1185
1186
1187
1188 if (enable_vmware_backdoor)
1189 set_exception_intercept(svm, GP_VECTOR);
1190
1191 svm_set_intercept(svm, INTERCEPT_INTR);
1192 svm_set_intercept(svm, INTERCEPT_NMI);
1193
1194 if (intercept_smi)
1195 svm_set_intercept(svm, INTERCEPT_SMI);
1196
1197 svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1198 svm_set_intercept(svm, INTERCEPT_RDPMC);
1199 svm_set_intercept(svm, INTERCEPT_CPUID);
1200 svm_set_intercept(svm, INTERCEPT_INVD);
1201 svm_set_intercept(svm, INTERCEPT_INVLPG);
1202 svm_set_intercept(svm, INTERCEPT_INVLPGA);
1203 svm_set_intercept(svm, INTERCEPT_IOIO_PROT);
1204 svm_set_intercept(svm, INTERCEPT_MSR_PROT);
1205 svm_set_intercept(svm, INTERCEPT_TASK_SWITCH);
1206 svm_set_intercept(svm, INTERCEPT_SHUTDOWN);
1207 svm_set_intercept(svm, INTERCEPT_VMRUN);
1208 svm_set_intercept(svm, INTERCEPT_VMMCALL);
1209 svm_set_intercept(svm, INTERCEPT_VMLOAD);
1210 svm_set_intercept(svm, INTERCEPT_VMSAVE);
1211 svm_set_intercept(svm, INTERCEPT_STGI);
1212 svm_set_intercept(svm, INTERCEPT_CLGI);
1213 svm_set_intercept(svm, INTERCEPT_SKINIT);
1214 svm_set_intercept(svm, INTERCEPT_WBINVD);
1215 svm_set_intercept(svm, INTERCEPT_XSETBV);
1216 svm_set_intercept(svm, INTERCEPT_RDPRU);
1217 svm_set_intercept(svm, INTERCEPT_RSM);
1218
1219 if (!kvm_mwait_in_guest(vcpu->kvm)) {
1220 svm_set_intercept(svm, INTERCEPT_MONITOR);
1221 svm_set_intercept(svm, INTERCEPT_MWAIT);
1222 }
1223
1224 if (!kvm_hlt_in_guest(vcpu->kvm))
1225 svm_set_intercept(svm, INTERCEPT_HLT);
1226
1227 control->iopm_base_pa = __sme_set(iopm_base);
1228 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1229 control->int_ctl = V_INTR_MASKING_MASK;
1230
1231 init_seg(&save->es);
1232 init_seg(&save->ss);
1233 init_seg(&save->ds);
1234 init_seg(&save->fs);
1235 init_seg(&save->gs);
1236
1237 save->cs.selector = 0xf000;
1238 save->cs.base = 0xffff0000;
1239
1240 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1241 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1242 save->cs.limit = 0xffff;
1243
1244 save->gdtr.limit = 0xffff;
1245 save->idtr.limit = 0xffff;
1246
1247 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1248 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1249
1250 svm_set_cr4(vcpu, 0);
1251 svm_set_efer(vcpu, 0);
1252 save->dr6 = 0xffff0ff0;
1253 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
1254 save->rip = 0x0000fff0;
1255 vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
1256
1257
1258
1259
1260
1261 svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1262 kvm_mmu_reset_context(vcpu);
1263
1264 save->cr4 = X86_CR4_PAE;
1265
1266
1267 if (npt_enabled) {
1268
1269 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1270 svm_clr_intercept(svm, INTERCEPT_INVLPG);
1271 clr_exception_intercept(svm, PF_VECTOR);
1272 svm_clr_intercept(svm, INTERCEPT_CR3_READ);
1273 svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
1274 save->g_pat = vcpu->arch.pat;
1275 save->cr3 = 0;
1276 save->cr4 = 0;
1277 }
1278 svm->current_vmcb->asid_generation = 0;
1279 svm->asid = 0;
1280
1281 svm->nested.vmcb12_gpa = INVALID_GPA;
1282 svm->nested.last_vmcb12_gpa = INVALID_GPA;
1283 vcpu->arch.hflags = 0;
1284
1285 if (!kvm_pause_in_guest(vcpu->kvm)) {
1286 control->pause_filter_count = pause_filter_count;
1287 if (pause_filter_thresh)
1288 control->pause_filter_thresh = pause_filter_thresh;
1289 svm_set_intercept(svm, INTERCEPT_PAUSE);
1290 } else {
1291 svm_clr_intercept(svm, INTERCEPT_PAUSE);
1292 }
1293
1294 svm_recalc_instruction_intercepts(vcpu, svm);
1295
1296
1297
1298
1299
1300 if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
1301 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
1302
1303 if (kvm_vcpu_apicv_active(vcpu))
1304 avic_init_vmcb(svm);
1305
1306 if (vgif) {
1307 svm_clr_intercept(svm, INTERCEPT_STGI);
1308 svm_clr_intercept(svm, INTERCEPT_CLGI);
1309 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1310 }
1311
1312 if (sev_guest(vcpu->kvm)) {
1313 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
1314 clr_exception_intercept(svm, UD_VECTOR);
1315
1316 if (sev_es_guest(vcpu->kvm)) {
1317
1318 sev_es_init_vmcb(svm);
1319 }
1320 }
1321
1322 svm_hv_init_vmcb(svm->vmcb);
1323
1324 vmcb_mark_all_dirty(svm->vmcb);
1325
1326 enable_gif(svm);
1327
1328}
1329
1330static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
1331{
1332 struct vcpu_svm *svm = to_svm(vcpu);
1333 u32 dummy;
1334 u32 eax = 1;
1335
1336 svm->spec_ctrl = 0;
1337 svm->virt_spec_ctrl = 0;
1338
1339 if (!init_event) {
1340 vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1341 MSR_IA32_APICBASE_ENABLE;
1342 if (kvm_vcpu_is_reset_bsp(vcpu))
1343 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
1344 }
1345 init_vmcb(vcpu);
1346
1347 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
1348 kvm_rdx_write(vcpu, eax);
1349
1350 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
1351 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
1352}
1353
1354void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
1355{
1356 svm->current_vmcb = target_vmcb;
1357 svm->vmcb = target_vmcb->ptr;
1358}
1359
1360static int svm_create_vcpu(struct kvm_vcpu *vcpu)
1361{
1362 struct vcpu_svm *svm;
1363 struct page *vmcb01_page;
1364 struct page *vmsa_page = NULL;
1365 int err;
1366
1367 BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
1368 svm = to_svm(vcpu);
1369
1370 err = -ENOMEM;
1371 vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1372 if (!vmcb01_page)
1373 goto out;
1374
1375 if (sev_es_guest(vcpu->kvm)) {
1376
1377
1378
1379
1380 vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1381 if (!vmsa_page)
1382 goto error_free_vmcb_page;
1383
1384
1385
1386
1387
1388
1389
1390 kvm_free_guest_fpu(vcpu);
1391 }
1392
1393 err = avic_init_vcpu(svm);
1394 if (err)
1395 goto error_free_vmsa_page;
1396
1397
1398
1399
1400 if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
1401 svm->avic_is_running = true;
1402
1403 svm->msrpm = svm_vcpu_alloc_msrpm();
1404 if (!svm->msrpm) {
1405 err = -ENOMEM;
1406 goto error_free_vmsa_page;
1407 }
1408
1409 svm->vmcb01.ptr = page_address(vmcb01_page);
1410 svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
1411
1412 if (vmsa_page)
1413 svm->vmsa = page_address(vmsa_page);
1414
1415 svm->guest_state_loaded = false;
1416
1417 svm_switch_vmcb(svm, &svm->vmcb01);
1418 init_vmcb(vcpu);
1419
1420 svm_vcpu_init_msrpm(vcpu, svm->msrpm);
1421
1422 svm_init_osvw(vcpu);
1423 vcpu->arch.microcode_version = 0x01000065;
1424
1425 if (sev_es_guest(vcpu->kvm))
1426
1427 sev_es_create_vcpu(svm);
1428
1429 return 0;
1430
1431error_free_vmsa_page:
1432 if (vmsa_page)
1433 __free_page(vmsa_page);
1434error_free_vmcb_page:
1435 __free_page(vmcb01_page);
1436out:
1437 return err;
1438}
1439
1440static void svm_clear_current_vmcb(struct vmcb *vmcb)
1441{
1442 int i;
1443
1444 for_each_online_cpu(i)
1445 cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
1446}
1447
1448static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1449{
1450 struct vcpu_svm *svm = to_svm(vcpu);
1451
1452
1453
1454
1455
1456
1457 svm_clear_current_vmcb(svm->vmcb);
1458
1459 svm_free_nested(svm);
1460
1461 sev_free_vcpu(vcpu);
1462
1463 __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT));
1464 __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
1465}
1466
1467static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
1468{
1469 struct vcpu_svm *svm = to_svm(vcpu);
1470 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
1471
1472 if (sev_es_guest(vcpu->kvm))
1473 sev_es_unmap_ghcb(svm);
1474
1475 if (svm->guest_state_loaded)
1476 return;
1477
1478
1479
1480
1481
1482 if (sev_es_guest(vcpu->kvm)) {
1483 sev_es_prepare_guest_switch(svm, vcpu->cpu);
1484 } else {
1485 vmsave(__sme_page_pa(sd->save_area));
1486 }
1487
1488 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1489 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
1490 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
1491 __this_cpu_write(current_tsc_ratio, tsc_ratio);
1492 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
1493 }
1494 }
1495
1496 if (likely(tsc_aux_uret_slot >= 0))
1497 kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
1498
1499 svm->guest_state_loaded = true;
1500}
1501
1502static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
1503{
1504 to_svm(vcpu)->guest_state_loaded = false;
1505}
1506
1507static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1508{
1509 struct vcpu_svm *svm = to_svm(vcpu);
1510 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1511
1512 if (sd->current_vmcb != svm->vmcb) {
1513 sd->current_vmcb = svm->vmcb;
1514 indirect_branch_prediction_barrier();
1515 }
1516 avic_vcpu_load(vcpu, cpu);
1517}
1518
1519static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1520{
1521 avic_vcpu_put(vcpu);
1522 svm_prepare_host_switch(vcpu);
1523
1524 ++vcpu->stat.host_state_reload;
1525}
1526
1527static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1528{
1529 struct vcpu_svm *svm = to_svm(vcpu);
1530 unsigned long rflags = svm->vmcb->save.rflags;
1531
1532 if (svm->nmi_singlestep) {
1533
1534 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1535 rflags &= ~X86_EFLAGS_TF;
1536 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1537 rflags &= ~X86_EFLAGS_RF;
1538 }
1539 return rflags;
1540}
1541
1542static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1543{
1544 if (to_svm(vcpu)->nmi_singlestep)
1545 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
1546
1547
1548
1549
1550
1551
1552 to_svm(vcpu)->vmcb->save.rflags = rflags;
1553}
1554
1555static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1556{
1557 switch (reg) {
1558 case VCPU_EXREG_PDPTR:
1559 BUG_ON(!npt_enabled);
1560 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1561 break;
1562 default:
1563 WARN_ON_ONCE(1);
1564 }
1565}
1566
1567static void svm_set_vintr(struct vcpu_svm *svm)
1568{
1569 struct vmcb_control_area *control;
1570
1571
1572
1573
1574 WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
1575
1576 svm_set_intercept(svm, INTERCEPT_VINTR);
1577
1578
1579
1580
1581
1582 control = &svm->vmcb->control;
1583 control->int_vector = 0x0;
1584 control->int_ctl &= ~V_INTR_PRIO_MASK;
1585 control->int_ctl |= V_IRQ_MASK |
1586 (( 0xf) << V_INTR_PRIO_SHIFT);
1587 vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
1588}
1589
1590static void svm_clear_vintr(struct vcpu_svm *svm)
1591{
1592 svm_clr_intercept(svm, INTERCEPT_VINTR);
1593
1594
1595 svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
1596 if (is_guest_mode(&svm->vcpu)) {
1597 svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
1598
1599 WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
1600 (svm->nested.ctl.int_ctl & V_TPR_MASK));
1601
1602 svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
1603 V_IRQ_INJECTION_BITS_MASK;
1604 }
1605
1606 vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
1607}
1608
1609static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1610{
1611 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1612 struct vmcb_save_area *save01 = &to_svm(vcpu)->vmcb01.ptr->save;
1613
1614 switch (seg) {
1615 case VCPU_SREG_CS: return &save->cs;
1616 case VCPU_SREG_DS: return &save->ds;
1617 case VCPU_SREG_ES: return &save->es;
1618 case VCPU_SREG_FS: return &save01->fs;
1619 case VCPU_SREG_GS: return &save01->gs;
1620 case VCPU_SREG_SS: return &save->ss;
1621 case VCPU_SREG_TR: return &save01->tr;
1622 case VCPU_SREG_LDTR: return &save01->ldtr;
1623 }
1624 BUG();
1625 return NULL;
1626}
1627
1628static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1629{
1630 struct vmcb_seg *s = svm_seg(vcpu, seg);
1631
1632 return s->base;
1633}
1634
1635static void svm_get_segment(struct kvm_vcpu *vcpu,
1636 struct kvm_segment *var, int seg)
1637{
1638 struct vmcb_seg *s = svm_seg(vcpu, seg);
1639
1640 var->base = s->base;
1641 var->limit = s->limit;
1642 var->selector = s->selector;
1643 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1644 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1645 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1646 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1647 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1648 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1649 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659 var->g = s->limit > 0xfffff;
1660
1661
1662
1663
1664
1665 var->unusable = !var->present;
1666
1667 switch (seg) {
1668 case VCPU_SREG_TR:
1669
1670
1671
1672
1673 var->type |= 0x2;
1674 break;
1675 case VCPU_SREG_DS:
1676 case VCPU_SREG_ES:
1677 case VCPU_SREG_FS:
1678 case VCPU_SREG_GS:
1679
1680
1681
1682
1683
1684
1685
1686 if (!var->unusable)
1687 var->type |= 0x1;
1688 break;
1689 case VCPU_SREG_SS:
1690
1691
1692
1693
1694
1695
1696 if (var->unusable)
1697 var->db = 0;
1698
1699 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
1700 break;
1701 }
1702}
1703
1704static int svm_get_cpl(struct kvm_vcpu *vcpu)
1705{
1706 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1707
1708 return save->cpl;
1709}
1710
1711static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1712{
1713 struct vcpu_svm *svm = to_svm(vcpu);
1714
1715 dt->size = svm->vmcb->save.idtr.limit;
1716 dt->address = svm->vmcb->save.idtr.base;
1717}
1718
1719static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1720{
1721 struct vcpu_svm *svm = to_svm(vcpu);
1722
1723 svm->vmcb->save.idtr.limit = dt->size;
1724 svm->vmcb->save.idtr.base = dt->address ;
1725 vmcb_mark_dirty(svm->vmcb, VMCB_DT);
1726}
1727
1728static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1729{
1730 struct vcpu_svm *svm = to_svm(vcpu);
1731
1732 dt->size = svm->vmcb->save.gdtr.limit;
1733 dt->address = svm->vmcb->save.gdtr.base;
1734}
1735
1736static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1737{
1738 struct vcpu_svm *svm = to_svm(vcpu);
1739
1740 svm->vmcb->save.gdtr.limit = dt->size;
1741 svm->vmcb->save.gdtr.base = dt->address ;
1742 vmcb_mark_dirty(svm->vmcb, VMCB_DT);
1743}
1744
1745void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1746{
1747 struct vcpu_svm *svm = to_svm(vcpu);
1748 u64 hcr0 = cr0;
1749
1750#ifdef CONFIG_X86_64
1751 if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
1752 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1753 vcpu->arch.efer |= EFER_LMA;
1754 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1755 }
1756
1757 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1758 vcpu->arch.efer &= ~EFER_LMA;
1759 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1760 }
1761 }
1762#endif
1763 vcpu->arch.cr0 = cr0;
1764
1765 if (!npt_enabled)
1766 hcr0 |= X86_CR0_PG | X86_CR0_WP;
1767
1768
1769
1770
1771
1772
1773 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1774 hcr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1775
1776 svm->vmcb->save.cr0 = hcr0;
1777 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
1778
1779
1780
1781
1782
1783 if (sev_es_guest(vcpu->kvm))
1784 return;
1785
1786 if (hcr0 == cr0) {
1787
1788 svm_clr_intercept(svm, INTERCEPT_CR0_READ);
1789 svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
1790 } else {
1791 svm_set_intercept(svm, INTERCEPT_CR0_READ);
1792 svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
1793 }
1794}
1795
1796static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1797{
1798 return true;
1799}
1800
1801void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1802{
1803 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
1804 unsigned long old_cr4 = vcpu->arch.cr4;
1805
1806 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1807 svm_flush_tlb(vcpu);
1808
1809 vcpu->arch.cr4 = cr4;
1810 if (!npt_enabled)
1811 cr4 |= X86_CR4_PAE;
1812 cr4 |= host_cr4_mce;
1813 to_svm(vcpu)->vmcb->save.cr4 = cr4;
1814 vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1815
1816 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
1817 kvm_update_cpuid_runtime(vcpu);
1818}
1819
1820static void svm_set_segment(struct kvm_vcpu *vcpu,
1821 struct kvm_segment *var, int seg)
1822{
1823 struct vcpu_svm *svm = to_svm(vcpu);
1824 struct vmcb_seg *s = svm_seg(vcpu, seg);
1825
1826 s->base = var->base;
1827 s->limit = var->limit;
1828 s->selector = var->selector;
1829 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1830 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1831 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1832 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
1833 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1834 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1835 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1836 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1837
1838
1839
1840
1841
1842
1843
1844 if (seg == VCPU_SREG_SS)
1845
1846 svm->vmcb->save.cpl = (var->dpl & 3);
1847
1848 vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
1849}
1850
1851static void svm_update_exception_bitmap(struct kvm_vcpu *vcpu)
1852{
1853 struct vcpu_svm *svm = to_svm(vcpu);
1854
1855 clr_exception_intercept(svm, BP_VECTOR);
1856
1857 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1858 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1859 set_exception_intercept(svm, BP_VECTOR);
1860 }
1861}
1862
1863static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1864{
1865 if (sd->next_asid > sd->max_asid) {
1866 ++sd->asid_generation;
1867 sd->next_asid = sd->min_asid;
1868 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1869 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
1870 }
1871
1872 svm->current_vmcb->asid_generation = sd->asid_generation;
1873 svm->asid = sd->next_asid++;
1874}
1875
1876static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
1877{
1878 struct vmcb *vmcb = svm->vmcb;
1879
1880 if (svm->vcpu.arch.guest_state_protected)
1881 return;
1882
1883 if (unlikely(value != vmcb->save.dr6)) {
1884 vmcb->save.dr6 = value;
1885 vmcb_mark_dirty(vmcb, VMCB_DR);
1886 }
1887}
1888
1889static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
1890{
1891 struct vcpu_svm *svm = to_svm(vcpu);
1892
1893 if (vcpu->arch.guest_state_protected)
1894 return;
1895
1896 get_debugreg(vcpu->arch.db[0], 0);
1897 get_debugreg(vcpu->arch.db[1], 1);
1898 get_debugreg(vcpu->arch.db[2], 2);
1899 get_debugreg(vcpu->arch.db[3], 3);
1900
1901
1902
1903
1904 vcpu->arch.dr6 = svm->vmcb->save.dr6;
1905 vcpu->arch.dr7 = svm->vmcb->save.dr7;
1906 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
1907 set_dr_intercepts(svm);
1908}
1909
1910static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1911{
1912 struct vcpu_svm *svm = to_svm(vcpu);
1913
1914 if (vcpu->arch.guest_state_protected)
1915 return;
1916
1917 svm->vmcb->save.dr7 = value;
1918 vmcb_mark_dirty(svm->vmcb, VMCB_DR);
1919}
1920
1921static int pf_interception(struct kvm_vcpu *vcpu)
1922{
1923 struct vcpu_svm *svm = to_svm(vcpu);
1924
1925 u64 fault_address = svm->vmcb->control.exit_info_2;
1926 u64 error_code = svm->vmcb->control.exit_info_1;
1927
1928 return kvm_handle_page_fault(vcpu, error_code, fault_address,
1929 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
1930 svm->vmcb->control.insn_bytes : NULL,
1931 svm->vmcb->control.insn_len);
1932}
1933
1934static int npf_interception(struct kvm_vcpu *vcpu)
1935{
1936 struct vcpu_svm *svm = to_svm(vcpu);
1937
1938 u64 fault_address = svm->vmcb->control.exit_info_2;
1939 u64 error_code = svm->vmcb->control.exit_info_1;
1940
1941 trace_kvm_page_fault(fault_address, error_code);
1942 return kvm_mmu_page_fault(vcpu, fault_address, error_code,
1943 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
1944 svm->vmcb->control.insn_bytes : NULL,
1945 svm->vmcb->control.insn_len);
1946}
1947
1948static int db_interception(struct kvm_vcpu *vcpu)
1949{
1950 struct kvm_run *kvm_run = vcpu->run;
1951 struct vcpu_svm *svm = to_svm(vcpu);
1952
1953 if (!(vcpu->guest_debug &
1954 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1955 !svm->nmi_singlestep) {
1956 u32 payload = svm->vmcb->save.dr6 ^ DR6_ACTIVE_LOW;
1957 kvm_queue_exception_p(vcpu, DB_VECTOR, payload);
1958 return 1;
1959 }
1960
1961 if (svm->nmi_singlestep) {
1962 disable_nmi_singlestep(svm);
1963
1964 kvm_make_request(KVM_REQ_EVENT, vcpu);
1965 }
1966
1967 if (vcpu->guest_debug &
1968 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1969 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1970 kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
1971 kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
1972 kvm_run->debug.arch.pc =
1973 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1974 kvm_run->debug.arch.exception = DB_VECTOR;
1975 return 0;
1976 }
1977
1978 return 1;
1979}
1980
1981static int bp_interception(struct kvm_vcpu *vcpu)
1982{
1983 struct vcpu_svm *svm = to_svm(vcpu);
1984 struct kvm_run *kvm_run = vcpu->run;
1985
1986 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1987 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1988 kvm_run->debug.arch.exception = BP_VECTOR;
1989 return 0;
1990}
1991
1992static int ud_interception(struct kvm_vcpu *vcpu)
1993{
1994 return handle_ud(vcpu);
1995}
1996
1997static int ac_interception(struct kvm_vcpu *vcpu)
1998{
1999 kvm_queue_exception_e(vcpu, AC_VECTOR, 0);
2000 return 1;
2001}
2002
2003static bool is_erratum_383(void)
2004{
2005 int err, i;
2006 u64 value;
2007
2008 if (!erratum_383_found)
2009 return false;
2010
2011 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2012 if (err)
2013 return false;
2014
2015
2016 value &= ~(1ULL << 62);
2017
2018 if (value != 0xb600000000010015ULL)
2019 return false;
2020
2021
2022 for (i = 0; i < 6; ++i)
2023 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2024
2025 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2026 if (!err) {
2027 u32 low, high;
2028
2029 value &= ~(1ULL << 2);
2030 low = lower_32_bits(value);
2031 high = upper_32_bits(value);
2032
2033 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2034 }
2035
2036
2037 __flush_tlb_all();
2038
2039 return true;
2040}
2041
2042static void svm_handle_mce(struct kvm_vcpu *vcpu)
2043{
2044 if (is_erratum_383()) {
2045
2046
2047
2048
2049 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2050
2051 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2052
2053 return;
2054 }
2055
2056
2057
2058
2059
2060 kvm_machine_check();
2061}
2062
2063static int mc_interception(struct kvm_vcpu *vcpu)
2064{
2065 return 1;
2066}
2067
2068static int shutdown_interception(struct kvm_vcpu *vcpu)
2069{
2070 struct kvm_run *kvm_run = vcpu->run;
2071 struct vcpu_svm *svm = to_svm(vcpu);
2072
2073
2074
2075
2076
2077 if (sev_es_guest(vcpu->kvm))
2078 return -EINVAL;
2079
2080
2081
2082
2083
2084 clear_page(svm->vmcb);
2085 init_vmcb(vcpu);
2086
2087 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2088 return 0;
2089}
2090
2091static int io_interception(struct kvm_vcpu *vcpu)
2092{
2093 struct vcpu_svm *svm = to_svm(vcpu);
2094 u32 io_info = svm->vmcb->control.exit_info_1;
2095 int size, in, string;
2096 unsigned port;
2097
2098 ++vcpu->stat.io_exits;
2099 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2100 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2101 port = io_info >> 16;
2102 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
2103
2104 if (string) {
2105 if (sev_es_guest(vcpu->kvm))
2106 return sev_es_string_io(svm, size, port, in);
2107 else
2108 return kvm_emulate_instruction(vcpu, 0);
2109 }
2110
2111 svm->next_rip = svm->vmcb->control.exit_info_2;
2112
2113 return kvm_fast_pio(vcpu, size, port, in);
2114}
2115
2116static int nmi_interception(struct kvm_vcpu *vcpu)
2117{
2118 return 1;
2119}
2120
2121static int smi_interception(struct kvm_vcpu *vcpu)
2122{
2123 return 1;
2124}
2125
2126static int intr_interception(struct kvm_vcpu *vcpu)
2127{
2128 ++vcpu->stat.irq_exits;
2129 return 1;
2130}
2131
2132static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
2133{
2134 struct vcpu_svm *svm = to_svm(vcpu);
2135 struct vmcb *vmcb12;
2136 struct kvm_host_map map;
2137 int ret;
2138
2139 if (nested_svm_check_permissions(vcpu))
2140 return 1;
2141
2142 ret = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
2143 if (ret) {
2144 if (ret == -EINVAL)
2145 kvm_inject_gp(vcpu, 0);
2146 return 1;
2147 }
2148
2149 vmcb12 = map.hva;
2150
2151 ret = kvm_skip_emulated_instruction(vcpu);
2152
2153 if (vmload) {
2154 svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
2155 svm->sysenter_eip_hi = 0;
2156 svm->sysenter_esp_hi = 0;
2157 } else {
2158 svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
2159 }
2160
2161 kvm_vcpu_unmap(vcpu, &map, true);
2162
2163 return ret;
2164}
2165
2166static int vmload_interception(struct kvm_vcpu *vcpu)
2167{
2168 return vmload_vmsave_interception(vcpu, true);
2169}
2170
2171static int vmsave_interception(struct kvm_vcpu *vcpu)
2172{
2173 return vmload_vmsave_interception(vcpu, false);
2174}
2175
2176static int vmrun_interception(struct kvm_vcpu *vcpu)
2177{
2178 if (nested_svm_check_permissions(vcpu))
2179 return 1;
2180
2181 return nested_svm_vmrun(vcpu);
2182}
2183
2184enum {
2185 NONE_SVM_INSTR,
2186 SVM_INSTR_VMRUN,
2187 SVM_INSTR_VMLOAD,
2188 SVM_INSTR_VMSAVE,
2189};
2190
2191
2192static int svm_instr_opcode(struct kvm_vcpu *vcpu)
2193{
2194 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
2195
2196 if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
2197 return NONE_SVM_INSTR;
2198
2199 switch (ctxt->modrm) {
2200 case 0xd8:
2201 return SVM_INSTR_VMRUN;
2202 case 0xda:
2203 return SVM_INSTR_VMLOAD;
2204 case 0xdb:
2205 return SVM_INSTR_VMSAVE;
2206 default:
2207 break;
2208 }
2209
2210 return NONE_SVM_INSTR;
2211}
2212
2213static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
2214{
2215 const int guest_mode_exit_codes[] = {
2216 [SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
2217 [SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
2218 [SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
2219 };
2220 int (*const svm_instr_handlers[])(struct kvm_vcpu *vcpu) = {
2221 [SVM_INSTR_VMRUN] = vmrun_interception,
2222 [SVM_INSTR_VMLOAD] = vmload_interception,
2223 [SVM_INSTR_VMSAVE] = vmsave_interception,
2224 };
2225 struct vcpu_svm *svm = to_svm(vcpu);
2226 int ret;
2227
2228 if (is_guest_mode(vcpu)) {
2229
2230 ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
2231 if (ret)
2232 return ret;
2233 return 1;
2234 }
2235 return svm_instr_handlers[opcode](vcpu);
2236}
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246static int gp_interception(struct kvm_vcpu *vcpu)
2247{
2248 struct vcpu_svm *svm = to_svm(vcpu);
2249 u32 error_code = svm->vmcb->control.exit_info_1;
2250 int opcode;
2251
2252
2253 if (error_code)
2254 goto reinject;
2255
2256
2257 if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
2258 goto reinject;
2259
2260 opcode = svm_instr_opcode(vcpu);
2261
2262 if (opcode == NONE_SVM_INSTR) {
2263 if (!enable_vmware_backdoor)
2264 goto reinject;
2265
2266
2267
2268
2269
2270 if (!is_guest_mode(vcpu))
2271 return kvm_emulate_instruction(vcpu,
2272 EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
2273 } else
2274 return emulate_svm_instr(vcpu, opcode);
2275
2276reinject:
2277 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2278 return 1;
2279}
2280
2281void svm_set_gif(struct vcpu_svm *svm, bool value)
2282{
2283 if (value) {
2284
2285
2286
2287
2288
2289
2290 if (vgif_enabled(svm))
2291 svm_clr_intercept(svm, INTERCEPT_STGI);
2292 if (svm_is_intercept(svm, INTERCEPT_VINTR))
2293 svm_clear_vintr(svm);
2294
2295 enable_gif(svm);
2296 if (svm->vcpu.arch.smi_pending ||
2297 svm->vcpu.arch.nmi_pending ||
2298 kvm_cpu_has_injectable_intr(&svm->vcpu))
2299 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2300 } else {
2301 disable_gif(svm);
2302
2303
2304
2305
2306
2307
2308 if (!vgif_enabled(svm))
2309 svm_clear_vintr(svm);
2310 }
2311}
2312
2313static int stgi_interception(struct kvm_vcpu *vcpu)
2314{
2315 int ret;
2316
2317 if (nested_svm_check_permissions(vcpu))
2318 return 1;
2319
2320 ret = kvm_skip_emulated_instruction(vcpu);
2321 svm_set_gif(to_svm(vcpu), true);
2322 return ret;
2323}
2324
2325static int clgi_interception(struct kvm_vcpu *vcpu)
2326{
2327 int ret;
2328
2329 if (nested_svm_check_permissions(vcpu))
2330 return 1;
2331
2332 ret = kvm_skip_emulated_instruction(vcpu);
2333 svm_set_gif(to_svm(vcpu), false);
2334 return ret;
2335}
2336
2337static int invlpga_interception(struct kvm_vcpu *vcpu)
2338{
2339 gva_t gva = kvm_rax_read(vcpu);
2340 u32 asid = kvm_rcx_read(vcpu);
2341
2342
2343 if (!is_long_mode(vcpu))
2344 gva = (u32)gva;
2345
2346 trace_kvm_invlpga(to_svm(vcpu)->vmcb->save.rip, asid, gva);
2347
2348
2349 kvm_mmu_invlpg(vcpu, gva);
2350
2351 return kvm_skip_emulated_instruction(vcpu);
2352}
2353
2354static int skinit_interception(struct kvm_vcpu *vcpu)
2355{
2356 trace_kvm_skinit(to_svm(vcpu)->vmcb->save.rip, kvm_rax_read(vcpu));
2357
2358 kvm_queue_exception(vcpu, UD_VECTOR);
2359 return 1;
2360}
2361
2362static int task_switch_interception(struct kvm_vcpu *vcpu)
2363{
2364 struct vcpu_svm *svm = to_svm(vcpu);
2365 u16 tss_selector;
2366 int reason;
2367 int int_type = svm->vmcb->control.exit_int_info &
2368 SVM_EXITINTINFO_TYPE_MASK;
2369 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2370 uint32_t type =
2371 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2372 uint32_t idt_v =
2373 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2374 bool has_error_code = false;
2375 u32 error_code = 0;
2376
2377 tss_selector = (u16)svm->vmcb->control.exit_info_1;
2378
2379 if (svm->vmcb->control.exit_info_2 &
2380 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2381 reason = TASK_SWITCH_IRET;
2382 else if (svm->vmcb->control.exit_info_2 &
2383 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2384 reason = TASK_SWITCH_JMP;
2385 else if (idt_v)
2386 reason = TASK_SWITCH_GATE;
2387 else
2388 reason = TASK_SWITCH_CALL;
2389
2390 if (reason == TASK_SWITCH_GATE) {
2391 switch (type) {
2392 case SVM_EXITINTINFO_TYPE_NMI:
2393 vcpu->arch.nmi_injected = false;
2394 break;
2395 case SVM_EXITINTINFO_TYPE_EXEPT:
2396 if (svm->vmcb->control.exit_info_2 &
2397 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2398 has_error_code = true;
2399 error_code =
2400 (u32)svm->vmcb->control.exit_info_2;
2401 }
2402 kvm_clear_exception_queue(vcpu);
2403 break;
2404 case SVM_EXITINTINFO_TYPE_INTR:
2405 kvm_clear_interrupt_queue(vcpu);
2406 break;
2407 default:
2408 break;
2409 }
2410 }
2411
2412 if (reason != TASK_SWITCH_GATE ||
2413 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2414 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2415 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
2416 if (!skip_emulated_instruction(vcpu))
2417 return 0;
2418 }
2419
2420 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2421 int_vec = -1;
2422
2423 return kvm_task_switch(vcpu, tss_selector, int_vec, reason,
2424 has_error_code, error_code);
2425}
2426
2427static int iret_interception(struct kvm_vcpu *vcpu)
2428{
2429 struct vcpu_svm *svm = to_svm(vcpu);
2430
2431 ++vcpu->stat.nmi_window_exits;
2432 vcpu->arch.hflags |= HF_IRET_MASK;
2433 if (!sev_es_guest(vcpu->kvm)) {
2434 svm_clr_intercept(svm, INTERCEPT_IRET);
2435 svm->nmi_iret_rip = kvm_rip_read(vcpu);
2436 }
2437 kvm_make_request(KVM_REQ_EVENT, vcpu);
2438 return 1;
2439}
2440
2441static int invlpg_interception(struct kvm_vcpu *vcpu)
2442{
2443 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2444 return kvm_emulate_instruction(vcpu, 0);
2445
2446 kvm_mmu_invlpg(vcpu, to_svm(vcpu)->vmcb->control.exit_info_1);
2447 return kvm_skip_emulated_instruction(vcpu);
2448}
2449
2450static int emulate_on_interception(struct kvm_vcpu *vcpu)
2451{
2452 return kvm_emulate_instruction(vcpu, 0);
2453}
2454
2455static int rsm_interception(struct kvm_vcpu *vcpu)
2456{
2457 return kvm_emulate_instruction_from_buffer(vcpu, rsm_ins_bytes, 2);
2458}
2459
2460static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
2461 unsigned long val)
2462{
2463 struct vcpu_svm *svm = to_svm(vcpu);
2464 unsigned long cr0 = vcpu->arch.cr0;
2465 bool ret = false;
2466
2467 if (!is_guest_mode(vcpu) ||
2468 (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
2469 return false;
2470
2471 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2472 val &= ~SVM_CR0_SELECTIVE_MASK;
2473
2474 if (cr0 ^ val) {
2475 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2476 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2477 }
2478
2479 return ret;
2480}
2481
2482#define CR_VALID (1ULL << 63)
2483
2484static int cr_interception(struct kvm_vcpu *vcpu)
2485{
2486 struct vcpu_svm *svm = to_svm(vcpu);
2487 int reg, cr;
2488 unsigned long val;
2489 int err;
2490
2491 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2492 return emulate_on_interception(vcpu);
2493
2494 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2495 return emulate_on_interception(vcpu);
2496
2497 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2498 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
2499 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
2500 else
2501 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2502
2503 err = 0;
2504 if (cr >= 16) {
2505 cr -= 16;
2506 val = kvm_register_read(vcpu, reg);
2507 trace_kvm_cr_write(cr, val);
2508 switch (cr) {
2509 case 0:
2510 if (!check_selective_cr0_intercepted(vcpu, val))
2511 err = kvm_set_cr0(vcpu, val);
2512 else
2513 return 1;
2514
2515 break;
2516 case 3:
2517 err = kvm_set_cr3(vcpu, val);
2518 break;
2519 case 4:
2520 err = kvm_set_cr4(vcpu, val);
2521 break;
2522 case 8:
2523 err = kvm_set_cr8(vcpu, val);
2524 break;
2525 default:
2526 WARN(1, "unhandled write to CR%d", cr);
2527 kvm_queue_exception(vcpu, UD_VECTOR);
2528 return 1;
2529 }
2530 } else {
2531 switch (cr) {
2532 case 0:
2533 val = kvm_read_cr0(vcpu);
2534 break;
2535 case 2:
2536 val = vcpu->arch.cr2;
2537 break;
2538 case 3:
2539 val = kvm_read_cr3(vcpu);
2540 break;
2541 case 4:
2542 val = kvm_read_cr4(vcpu);
2543 break;
2544 case 8:
2545 val = kvm_get_cr8(vcpu);
2546 break;
2547 default:
2548 WARN(1, "unhandled read from CR%d", cr);
2549 kvm_queue_exception(vcpu, UD_VECTOR);
2550 return 1;
2551 }
2552 kvm_register_write(vcpu, reg, val);
2553 trace_kvm_cr_read(cr, val);
2554 }
2555 return kvm_complete_insn_gp(vcpu, err);
2556}
2557
2558static int cr_trap(struct kvm_vcpu *vcpu)
2559{
2560 struct vcpu_svm *svm = to_svm(vcpu);
2561 unsigned long old_value, new_value;
2562 unsigned int cr;
2563 int ret = 0;
2564
2565 new_value = (unsigned long)svm->vmcb->control.exit_info_1;
2566
2567 cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
2568 switch (cr) {
2569 case 0:
2570 old_value = kvm_read_cr0(vcpu);
2571 svm_set_cr0(vcpu, new_value);
2572
2573 kvm_post_set_cr0(vcpu, old_value, new_value);
2574 break;
2575 case 4:
2576 old_value = kvm_read_cr4(vcpu);
2577 svm_set_cr4(vcpu, new_value);
2578
2579 kvm_post_set_cr4(vcpu, old_value, new_value);
2580 break;
2581 case 8:
2582 ret = kvm_set_cr8(vcpu, new_value);
2583 break;
2584 default:
2585 WARN(1, "unhandled CR%d write trap", cr);
2586 kvm_queue_exception(vcpu, UD_VECTOR);
2587 return 1;
2588 }
2589
2590 return kvm_complete_insn_gp(vcpu, ret);
2591}
2592
2593static int dr_interception(struct kvm_vcpu *vcpu)
2594{
2595 struct vcpu_svm *svm = to_svm(vcpu);
2596 int reg, dr;
2597 unsigned long val;
2598 int err = 0;
2599
2600 if (vcpu->guest_debug == 0) {
2601
2602
2603
2604
2605
2606 clr_dr_intercepts(svm);
2607 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
2608 return 1;
2609 }
2610
2611 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2612 return emulate_on_interception(vcpu);
2613
2614 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2615 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2616 if (dr >= 16) {
2617 dr -= 16;
2618 val = kvm_register_read(vcpu, reg);
2619 err = kvm_set_dr(vcpu, dr, val);
2620 } else {
2621 kvm_get_dr(vcpu, dr, &val);
2622 kvm_register_write(vcpu, reg, val);
2623 }
2624
2625 return kvm_complete_insn_gp(vcpu, err);
2626}
2627
2628static int cr8_write_interception(struct kvm_vcpu *vcpu)
2629{
2630 int r;
2631
2632 u8 cr8_prev = kvm_get_cr8(vcpu);
2633
2634 r = cr_interception(vcpu);
2635 if (lapic_in_kernel(vcpu))
2636 return r;
2637 if (cr8_prev <= kvm_get_cr8(vcpu))
2638 return r;
2639 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
2640 return 0;
2641}
2642
2643static int efer_trap(struct kvm_vcpu *vcpu)
2644{
2645 struct msr_data msr_info;
2646 int ret;
2647
2648
2649
2650
2651
2652
2653
2654 msr_info.host_initiated = false;
2655 msr_info.index = MSR_EFER;
2656 msr_info.data = to_svm(vcpu)->vmcb->control.exit_info_1 & ~EFER_SVME;
2657 ret = kvm_set_msr_common(vcpu, &msr_info);
2658
2659 return kvm_complete_insn_gp(vcpu, ret);
2660}
2661
2662static int svm_get_msr_feature(struct kvm_msr_entry *msr)
2663{
2664 msr->data = 0;
2665
2666 switch (msr->index) {
2667 case MSR_F10H_DECFG:
2668 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
2669 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
2670 break;
2671 case MSR_IA32_PERF_CAPABILITIES:
2672 return 0;
2673 default:
2674 return KVM_MSR_RET_INVALID;
2675 }
2676
2677 return 0;
2678}
2679
2680static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2681{
2682 struct vcpu_svm *svm = to_svm(vcpu);
2683
2684 switch (msr_info->index) {
2685 case MSR_STAR:
2686 msr_info->data = svm->vmcb01.ptr->save.star;
2687 break;
2688#ifdef CONFIG_X86_64
2689 case MSR_LSTAR:
2690 msr_info->data = svm->vmcb01.ptr->save.lstar;
2691 break;
2692 case MSR_CSTAR:
2693 msr_info->data = svm->vmcb01.ptr->save.cstar;
2694 break;
2695 case MSR_KERNEL_GS_BASE:
2696 msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base;
2697 break;
2698 case MSR_SYSCALL_MASK:
2699 msr_info->data = svm->vmcb01.ptr->save.sfmask;
2700 break;
2701#endif
2702 case MSR_IA32_SYSENTER_CS:
2703 msr_info->data = svm->vmcb01.ptr->save.sysenter_cs;
2704 break;
2705 case MSR_IA32_SYSENTER_EIP:
2706 msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
2707 if (guest_cpuid_is_intel(vcpu))
2708 msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
2709 break;
2710 case MSR_IA32_SYSENTER_ESP:
2711 msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
2712 if (guest_cpuid_is_intel(vcpu))
2713 msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
2714 break;
2715 case MSR_TSC_AUX:
2716 msr_info->data = svm->tsc_aux;
2717 break;
2718
2719
2720
2721
2722
2723 case MSR_IA32_DEBUGCTLMSR:
2724 msr_info->data = svm->vmcb->save.dbgctl;
2725 break;
2726 case MSR_IA32_LASTBRANCHFROMIP:
2727 msr_info->data = svm->vmcb->save.br_from;
2728 break;
2729 case MSR_IA32_LASTBRANCHTOIP:
2730 msr_info->data = svm->vmcb->save.br_to;
2731 break;
2732 case MSR_IA32_LASTINTFROMIP:
2733 msr_info->data = svm->vmcb->save.last_excp_from;
2734 break;
2735 case MSR_IA32_LASTINTTOIP:
2736 msr_info->data = svm->vmcb->save.last_excp_to;
2737 break;
2738 case MSR_VM_HSAVE_PA:
2739 msr_info->data = svm->nested.hsave_msr;
2740 break;
2741 case MSR_VM_CR:
2742 msr_info->data = svm->nested.vm_cr_msr;
2743 break;
2744 case MSR_IA32_SPEC_CTRL:
2745 if (!msr_info->host_initiated &&
2746 !guest_has_spec_ctrl_msr(vcpu))
2747 return 1;
2748
2749 if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
2750 msr_info->data = svm->vmcb->save.spec_ctrl;
2751 else
2752 msr_info->data = svm->spec_ctrl;
2753 break;
2754 case MSR_AMD64_VIRT_SPEC_CTRL:
2755 if (!msr_info->host_initiated &&
2756 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
2757 return 1;
2758
2759 msr_info->data = svm->virt_spec_ctrl;
2760 break;
2761 case MSR_F15H_IC_CFG: {
2762
2763 int family, model;
2764
2765 family = guest_cpuid_family(vcpu);
2766 model = guest_cpuid_model(vcpu);
2767
2768 if (family < 0 || model < 0)
2769 return kvm_get_msr_common(vcpu, msr_info);
2770
2771 msr_info->data = 0;
2772
2773 if (family == 0x15 &&
2774 (model >= 0x2 && model < 0x20))
2775 msr_info->data = 0x1E;
2776 }
2777 break;
2778 case MSR_F10H_DECFG:
2779 msr_info->data = svm->msr_decfg;
2780 break;
2781 default:
2782 return kvm_get_msr_common(vcpu, msr_info);
2783 }
2784 return 0;
2785}
2786
2787static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
2788{
2789 struct vcpu_svm *svm = to_svm(vcpu);
2790 if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
2791 return kvm_complete_insn_gp(vcpu, err);
2792
2793 ghcb_set_sw_exit_info_1(svm->ghcb, 1);
2794 ghcb_set_sw_exit_info_2(svm->ghcb,
2795 X86_TRAP_GP |
2796 SVM_EVTINJ_TYPE_EXEPT |
2797 SVM_EVTINJ_VALID);
2798 return 1;
2799}
2800
2801static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2802{
2803 struct vcpu_svm *svm = to_svm(vcpu);
2804 int svm_dis, chg_mask;
2805
2806 if (data & ~SVM_VM_CR_VALID_MASK)
2807 return 1;
2808
2809 chg_mask = SVM_VM_CR_VALID_MASK;
2810
2811 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2812 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2813
2814 svm->nested.vm_cr_msr &= ~chg_mask;
2815 svm->nested.vm_cr_msr |= (data & chg_mask);
2816
2817 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2818
2819
2820 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2821 return 1;
2822
2823 return 0;
2824}
2825
2826static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2827{
2828 struct vcpu_svm *svm = to_svm(vcpu);
2829 int r;
2830
2831 u32 ecx = msr->index;
2832 u64 data = msr->data;
2833 switch (ecx) {
2834 case MSR_IA32_CR_PAT:
2835 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
2836 return 1;
2837 vcpu->arch.pat = data;
2838 svm->vmcb01.ptr->save.g_pat = data;
2839 if (is_guest_mode(vcpu))
2840 nested_vmcb02_compute_g_pat(svm);
2841 vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
2842 break;
2843 case MSR_IA32_SPEC_CTRL:
2844 if (!msr->host_initiated &&
2845 !guest_has_spec_ctrl_msr(vcpu))
2846 return 1;
2847
2848 if (kvm_spec_ctrl_test_value(data))
2849 return 1;
2850
2851 if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
2852 svm->vmcb->save.spec_ctrl = data;
2853 else
2854 svm->spec_ctrl = data;
2855 if (!data)
2856 break;
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
2870 break;
2871 case MSR_IA32_PRED_CMD:
2872 if (!msr->host_initiated &&
2873 !guest_has_pred_cmd_msr(vcpu))
2874 return 1;
2875
2876 if (data & ~PRED_CMD_IBPB)
2877 return 1;
2878 if (!boot_cpu_has(X86_FEATURE_IBPB))
2879 return 1;
2880 if (!data)
2881 break;
2882
2883 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2884 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
2885 break;
2886 case MSR_AMD64_VIRT_SPEC_CTRL:
2887 if (!msr->host_initiated &&
2888 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
2889 return 1;
2890
2891 if (data & ~SPEC_CTRL_SSBD)
2892 return 1;
2893
2894 svm->virt_spec_ctrl = data;
2895 break;
2896 case MSR_STAR:
2897 svm->vmcb01.ptr->save.star = data;
2898 break;
2899#ifdef CONFIG_X86_64
2900 case MSR_LSTAR:
2901 svm->vmcb01.ptr->save.lstar = data;
2902 break;
2903 case MSR_CSTAR:
2904 svm->vmcb01.ptr->save.cstar = data;
2905 break;
2906 case MSR_KERNEL_GS_BASE:
2907 svm->vmcb01.ptr->save.kernel_gs_base = data;
2908 break;
2909 case MSR_SYSCALL_MASK:
2910 svm->vmcb01.ptr->save.sfmask = data;
2911 break;
2912#endif
2913 case MSR_IA32_SYSENTER_CS:
2914 svm->vmcb01.ptr->save.sysenter_cs = data;
2915 break;
2916 case MSR_IA32_SYSENTER_EIP:
2917 svm->vmcb01.ptr->save.sysenter_eip = (u32)data;
2918
2919
2920
2921
2922
2923
2924
2925 svm->sysenter_eip_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
2926 break;
2927 case MSR_IA32_SYSENTER_ESP:
2928 svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
2929 svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
2930 break;
2931 case MSR_TSC_AUX:
2932
2933
2934
2935
2936
2937 preempt_disable();
2938 r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
2939 preempt_enable();
2940 if (r)
2941 return 1;
2942
2943 svm->tsc_aux = data;
2944 break;
2945 case MSR_IA32_DEBUGCTLMSR:
2946 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
2947 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
2948 __func__, data);
2949 break;
2950 }
2951 if (data & DEBUGCTL_RESERVED_BITS)
2952 return 1;
2953
2954 svm->vmcb->save.dbgctl = data;
2955 vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
2956 if (data & (1ULL<<0))
2957 svm_enable_lbrv(vcpu);
2958 else
2959 svm_disable_lbrv(vcpu);
2960 break;
2961 case MSR_VM_HSAVE_PA:
2962
2963
2964
2965
2966
2967
2968 if (!msr->host_initiated && !page_address_valid(vcpu, data))
2969 return 1;
2970
2971 svm->nested.hsave_msr = data & PAGE_MASK;
2972 break;
2973 case MSR_VM_CR:
2974 return svm_set_vm_cr(vcpu, data);
2975 case MSR_VM_IGNNE:
2976 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2977 break;
2978 case MSR_F10H_DECFG: {
2979 struct kvm_msr_entry msr_entry;
2980
2981 msr_entry.index = msr->index;
2982 if (svm_get_msr_feature(&msr_entry))
2983 return 1;
2984
2985
2986 if (data & ~msr_entry.data)
2987 return 1;
2988
2989
2990 if (!msr->host_initiated && (data ^ msr_entry.data))
2991 return 1;
2992
2993 svm->msr_decfg = data;
2994 break;
2995 }
2996 case MSR_IA32_APICBASE:
2997 if (kvm_vcpu_apicv_active(vcpu))
2998 avic_update_vapic_bar(to_svm(vcpu), data);
2999 fallthrough;
3000 default:
3001 return kvm_set_msr_common(vcpu, msr);
3002 }
3003 return 0;
3004}
3005
3006static int msr_interception(struct kvm_vcpu *vcpu)
3007{
3008 if (to_svm(vcpu)->vmcb->control.exit_info_1)
3009 return kvm_emulate_wrmsr(vcpu);
3010 else
3011 return kvm_emulate_rdmsr(vcpu);
3012}
3013
3014static int interrupt_window_interception(struct kvm_vcpu *vcpu)
3015{
3016 kvm_make_request(KVM_REQ_EVENT, vcpu);
3017 svm_clear_vintr(to_svm(vcpu));
3018
3019
3020
3021
3022
3023
3024 svm_toggle_avic_for_irq_window(vcpu, true);
3025
3026 ++vcpu->stat.irq_window_exits;
3027 return 1;
3028}
3029
3030static int pause_interception(struct kvm_vcpu *vcpu)
3031{
3032 bool in_kernel;
3033
3034
3035
3036
3037
3038
3039 in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
3040
3041 if (!kvm_pause_in_guest(vcpu->kvm))
3042 grow_ple_window(vcpu);
3043
3044 kvm_vcpu_on_spin(vcpu, in_kernel);
3045 return kvm_skip_emulated_instruction(vcpu);
3046}
3047
3048static int invpcid_interception(struct kvm_vcpu *vcpu)
3049{
3050 struct vcpu_svm *svm = to_svm(vcpu);
3051 unsigned long type;
3052 gva_t gva;
3053
3054 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
3055 kvm_queue_exception(vcpu, UD_VECTOR);
3056 return 1;
3057 }
3058
3059
3060
3061
3062
3063
3064 type = svm->vmcb->control.exit_info_2;
3065 gva = svm->vmcb->control.exit_info_1;
3066
3067 if (type > 3) {
3068 kvm_inject_gp(vcpu, 0);
3069 return 1;
3070 }
3071
3072 return kvm_handle_invpcid(vcpu, type, gva);
3073}
3074
3075static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3076 [SVM_EXIT_READ_CR0] = cr_interception,
3077 [SVM_EXIT_READ_CR3] = cr_interception,
3078 [SVM_EXIT_READ_CR4] = cr_interception,
3079 [SVM_EXIT_READ_CR8] = cr_interception,
3080 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
3081 [SVM_EXIT_WRITE_CR0] = cr_interception,
3082 [SVM_EXIT_WRITE_CR3] = cr_interception,
3083 [SVM_EXIT_WRITE_CR4] = cr_interception,
3084 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
3085 [SVM_EXIT_READ_DR0] = dr_interception,
3086 [SVM_EXIT_READ_DR1] = dr_interception,
3087 [SVM_EXIT_READ_DR2] = dr_interception,
3088 [SVM_EXIT_READ_DR3] = dr_interception,
3089 [SVM_EXIT_READ_DR4] = dr_interception,
3090 [SVM_EXIT_READ_DR5] = dr_interception,
3091 [SVM_EXIT_READ_DR6] = dr_interception,
3092 [SVM_EXIT_READ_DR7] = dr_interception,
3093 [SVM_EXIT_WRITE_DR0] = dr_interception,
3094 [SVM_EXIT_WRITE_DR1] = dr_interception,
3095 [SVM_EXIT_WRITE_DR2] = dr_interception,
3096 [SVM_EXIT_WRITE_DR3] = dr_interception,
3097 [SVM_EXIT_WRITE_DR4] = dr_interception,
3098 [SVM_EXIT_WRITE_DR5] = dr_interception,
3099 [SVM_EXIT_WRITE_DR6] = dr_interception,
3100 [SVM_EXIT_WRITE_DR7] = dr_interception,
3101 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
3102 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
3103 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
3104 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
3105 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
3106 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
3107 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
3108 [SVM_EXIT_INTR] = intr_interception,
3109 [SVM_EXIT_NMI] = nmi_interception,
3110 [SVM_EXIT_SMI] = smi_interception,
3111 [SVM_EXIT_VINTR] = interrupt_window_interception,
3112 [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
3113 [SVM_EXIT_CPUID] = kvm_emulate_cpuid,
3114 [SVM_EXIT_IRET] = iret_interception,
3115 [SVM_EXIT_INVD] = kvm_emulate_invd,
3116 [SVM_EXIT_PAUSE] = pause_interception,
3117 [SVM_EXIT_HLT] = kvm_emulate_halt,
3118 [SVM_EXIT_INVLPG] = invlpg_interception,
3119 [SVM_EXIT_INVLPGA] = invlpga_interception,
3120 [SVM_EXIT_IOIO] = io_interception,
3121 [SVM_EXIT_MSR] = msr_interception,
3122 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
3123 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
3124 [SVM_EXIT_VMRUN] = vmrun_interception,
3125 [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall,
3126 [SVM_EXIT_VMLOAD] = vmload_interception,
3127 [SVM_EXIT_VMSAVE] = vmsave_interception,
3128 [SVM_EXIT_STGI] = stgi_interception,
3129 [SVM_EXIT_CLGI] = clgi_interception,
3130 [SVM_EXIT_SKINIT] = skinit_interception,
3131 [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,
3132 [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
3133 [SVM_EXIT_MONITOR] = kvm_emulate_monitor,
3134 [SVM_EXIT_MWAIT] = kvm_emulate_mwait,
3135 [SVM_EXIT_XSETBV] = kvm_emulate_xsetbv,
3136 [SVM_EXIT_RDPRU] = kvm_handle_invalid_op,
3137 [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap,
3138 [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap,
3139 [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
3140 [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap,
3141 [SVM_EXIT_INVPCID] = invpcid_interception,
3142 [SVM_EXIT_NPF] = npf_interception,
3143 [SVM_EXIT_RSM] = rsm_interception,
3144 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
3145 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
3146 [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit,
3147};
3148
3149static void dump_vmcb(struct kvm_vcpu *vcpu)
3150{
3151 struct vcpu_svm *svm = to_svm(vcpu);
3152 struct vmcb_control_area *control = &svm->vmcb->control;
3153 struct vmcb_save_area *save = &svm->vmcb->save;
3154 struct vmcb_save_area *save01 = &svm->vmcb01.ptr->save;
3155
3156 if (!dump_invalid_vmcb) {
3157 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
3158 return;
3159 }
3160
3161 pr_err("VMCB %p, last attempted VMRUN on CPU %d\n",
3162 svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu);
3163 pr_err("VMCB Control Area:\n");
3164 pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
3165 pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
3166 pr_err("%-20s%04x\n", "dr_read:", control->intercepts[INTERCEPT_DR] & 0xffff);
3167 pr_err("%-20s%04x\n", "dr_write:", control->intercepts[INTERCEPT_DR] >> 16);
3168 pr_err("%-20s%08x\n", "exceptions:", control->intercepts[INTERCEPT_EXCEPTION]);
3169 pr_err("%-20s%08x %08x\n", "intercepts:",
3170 control->intercepts[INTERCEPT_WORD3],
3171 control->intercepts[INTERCEPT_WORD4]);
3172 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3173 pr_err("%-20s%d\n", "pause filter threshold:",
3174 control->pause_filter_thresh);
3175 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3176 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3177 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3178 pr_err("%-20s%d\n", "asid:", control->asid);
3179 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3180 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3181 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3182 pr_err("%-20s%08x\n", "int_state:", control->int_state);
3183 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3184 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3185 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3186 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3187 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3188 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3189 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3190 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
3191 pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
3192 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3193 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3194 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
3195 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3196 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
3197 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
3198 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
3199 pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa);
3200 pr_err("VMCB State Save Area:\n");
3201 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3202 "es:",
3203 save->es.selector, save->es.attrib,
3204 save->es.limit, save->es.base);
3205 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3206 "cs:",
3207 save->cs.selector, save->cs.attrib,
3208 save->cs.limit, save->cs.base);
3209 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3210 "ss:",
3211 save->ss.selector, save->ss.attrib,
3212 save->ss.limit, save->ss.base);
3213 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3214 "ds:",
3215 save->ds.selector, save->ds.attrib,
3216 save->ds.limit, save->ds.base);
3217 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3218 "fs:",
3219 save01->fs.selector, save01->fs.attrib,
3220 save01->fs.limit, save01->fs.base);
3221 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3222 "gs:",
3223 save01->gs.selector, save01->gs.attrib,
3224 save01->gs.limit, save01->gs.base);
3225 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3226 "gdtr:",
3227 save->gdtr.selector, save->gdtr.attrib,
3228 save->gdtr.limit, save->gdtr.base);
3229 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3230 "ldtr:",
3231 save01->ldtr.selector, save01->ldtr.attrib,
3232 save01->ldtr.limit, save01->ldtr.base);
3233 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3234 "idtr:",
3235 save->idtr.selector, save->idtr.attrib,
3236 save->idtr.limit, save->idtr.base);
3237 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3238 "tr:",
3239 save01->tr.selector, save01->tr.attrib,
3240 save01->tr.limit, save01->tr.base);
3241 pr_err("cpl: %d efer: %016llx\n",
3242 save->cpl, save->efer);
3243 pr_err("%-15s %016llx %-13s %016llx\n",
3244 "cr0:", save->cr0, "cr2:", save->cr2);
3245 pr_err("%-15s %016llx %-13s %016llx\n",
3246 "cr3:", save->cr3, "cr4:", save->cr4);
3247 pr_err("%-15s %016llx %-13s %016llx\n",
3248 "dr6:", save->dr6, "dr7:", save->dr7);
3249 pr_err("%-15s %016llx %-13s %016llx\n",
3250 "rip:", save->rip, "rflags:", save->rflags);
3251 pr_err("%-15s %016llx %-13s %016llx\n",
3252 "rsp:", save->rsp, "rax:", save->rax);
3253 pr_err("%-15s %016llx %-13s %016llx\n",
3254 "star:", save01->star, "lstar:", save01->lstar);
3255 pr_err("%-15s %016llx %-13s %016llx\n",
3256 "cstar:", save01->cstar, "sfmask:", save01->sfmask);
3257 pr_err("%-15s %016llx %-13s %016llx\n",
3258 "kernel_gs_base:", save01->kernel_gs_base,
3259 "sysenter_cs:", save01->sysenter_cs);
3260 pr_err("%-15s %016llx %-13s %016llx\n",
3261 "sysenter_esp:", save01->sysenter_esp,
3262 "sysenter_eip:", save01->sysenter_eip);
3263 pr_err("%-15s %016llx %-13s %016llx\n",
3264 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3265 pr_err("%-15s %016llx %-13s %016llx\n",
3266 "br_from:", save->br_from, "br_to:", save->br_to);
3267 pr_err("%-15s %016llx %-13s %016llx\n",
3268 "excp_from:", save->last_excp_from,
3269 "excp_to:", save->last_excp_to);
3270}
3271
3272static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
3273{
3274 if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
3275 svm_exit_handlers[exit_code])
3276 return 0;
3277
3278 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
3279 dump_vmcb(vcpu);
3280 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3281 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
3282 vcpu->run->internal.ndata = 2;
3283 vcpu->run->internal.data[0] = exit_code;
3284 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
3285
3286 return -EINVAL;
3287}
3288
3289int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
3290{
3291 if (svm_handle_invalid_exit(vcpu, exit_code))
3292 return 0;
3293
3294#ifdef CONFIG_RETPOLINE
3295 if (exit_code == SVM_EXIT_MSR)
3296 return msr_interception(vcpu);
3297 else if (exit_code == SVM_EXIT_VINTR)
3298 return interrupt_window_interception(vcpu);
3299 else if (exit_code == SVM_EXIT_INTR)
3300 return intr_interception(vcpu);
3301 else if (exit_code == SVM_EXIT_HLT)
3302 return kvm_emulate_halt(vcpu);
3303 else if (exit_code == SVM_EXIT_NPF)
3304 return npf_interception(vcpu);
3305#endif
3306 return svm_exit_handlers[exit_code](vcpu);
3307}
3308
3309static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
3310 u32 *intr_info, u32 *error_code)
3311{
3312 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3313
3314 *info1 = control->exit_info_1;
3315 *info2 = control->exit_info_2;
3316 *intr_info = control->exit_int_info;
3317 if ((*intr_info & SVM_EXITINTINFO_VALID) &&
3318 (*intr_info & SVM_EXITINTINFO_VALID_ERR))
3319 *error_code = control->exit_int_info_err;
3320 else
3321 *error_code = 0;
3322}
3323
3324static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
3325{
3326 struct vcpu_svm *svm = to_svm(vcpu);
3327 struct kvm_run *kvm_run = vcpu->run;
3328 u32 exit_code = svm->vmcb->control.exit_code;
3329
3330 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
3331
3332
3333 if (!sev_es_guest(vcpu->kvm)) {
3334 if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
3335 vcpu->arch.cr0 = svm->vmcb->save.cr0;
3336 if (npt_enabled)
3337 vcpu->arch.cr3 = svm->vmcb->save.cr3;
3338 }
3339
3340 if (is_guest_mode(vcpu)) {
3341 int vmexit;
3342
3343 trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM);
3344
3345 vmexit = nested_svm_exit_special(svm);
3346
3347 if (vmexit == NESTED_EXIT_CONTINUE)
3348 vmexit = nested_svm_exit_handled(svm);
3349
3350 if (vmexit == NESTED_EXIT_DONE)
3351 return 1;
3352 }
3353
3354 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3355 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3356 kvm_run->fail_entry.hardware_entry_failure_reason
3357 = svm->vmcb->control.exit_code;
3358 kvm_run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
3359 dump_vmcb(vcpu);
3360 return 0;
3361 }
3362
3363 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3364 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3365 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3366 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3367 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
3368 "exit_code 0x%x\n",
3369 __func__, svm->vmcb->control.exit_int_info,
3370 exit_code);
3371
3372 if (exit_fastpath != EXIT_FASTPATH_NONE)
3373 return 1;
3374
3375 return svm_invoke_exit_handler(vcpu, exit_code);
3376}
3377
3378static void reload_tss(struct kvm_vcpu *vcpu)
3379{
3380 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
3381
3382 sd->tss_desc->type = 9;
3383 load_TR_desc();
3384}
3385
3386static void pre_svm_run(struct kvm_vcpu *vcpu)
3387{
3388 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
3389 struct vcpu_svm *svm = to_svm(vcpu);
3390
3391
3392
3393
3394
3395
3396 if (unlikely(svm->current_vmcb->cpu != vcpu->cpu)) {
3397 svm->current_vmcb->asid_generation = 0;
3398 vmcb_mark_all_dirty(svm->vmcb);
3399 svm->current_vmcb->cpu = vcpu->cpu;
3400 }
3401
3402 if (sev_guest(vcpu->kvm))
3403 return pre_sev_run(svm, vcpu->cpu);
3404
3405
3406 if (svm->current_vmcb->asid_generation != sd->asid_generation)
3407 new_asid(svm, sd);
3408}
3409
3410static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3411{
3412 struct vcpu_svm *svm = to_svm(vcpu);
3413
3414 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3415 vcpu->arch.hflags |= HF_NMI_MASK;
3416 if (!sev_es_guest(vcpu->kvm))
3417 svm_set_intercept(svm, INTERCEPT_IRET);
3418 ++vcpu->stat.nmi_injections;
3419}
3420
3421static void svm_set_irq(struct kvm_vcpu *vcpu)
3422{
3423 struct vcpu_svm *svm = to_svm(vcpu);
3424
3425 BUG_ON(!(gif_set(svm)));
3426
3427 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3428 ++vcpu->stat.irq_injections;
3429
3430 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3431 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3432}
3433
3434static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3435{
3436 struct vcpu_svm *svm = to_svm(vcpu);
3437
3438
3439
3440
3441
3442 if (sev_es_guest(vcpu->kvm))
3443 return;
3444
3445 if (nested_svm_virtualize_tpr(vcpu))
3446 return;
3447
3448 svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
3449
3450 if (irr == -1)
3451 return;
3452
3453 if (tpr >= irr)
3454 svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
3455}
3456
3457bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
3458{
3459 struct vcpu_svm *svm = to_svm(vcpu);
3460 struct vmcb *vmcb = svm->vmcb;
3461 bool ret;
3462
3463 if (!gif_set(svm))
3464 return true;
3465
3466 if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
3467 return false;
3468
3469 ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
3470 (vcpu->arch.hflags & HF_NMI_MASK);
3471
3472 return ret;
3473}
3474
3475static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3476{
3477 struct vcpu_svm *svm = to_svm(vcpu);
3478 if (svm->nested.nested_run_pending)
3479 return -EBUSY;
3480
3481
3482 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
3483 return -EBUSY;
3484
3485 return !svm_nmi_blocked(vcpu);
3486}
3487
3488static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3489{
3490 return !!(vcpu->arch.hflags & HF_NMI_MASK);
3491}
3492
3493static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3494{
3495 struct vcpu_svm *svm = to_svm(vcpu);
3496
3497 if (masked) {
3498 vcpu->arch.hflags |= HF_NMI_MASK;
3499 if (!sev_es_guest(vcpu->kvm))
3500 svm_set_intercept(svm, INTERCEPT_IRET);
3501 } else {
3502 vcpu->arch.hflags &= ~HF_NMI_MASK;
3503 if (!sev_es_guest(vcpu->kvm))
3504 svm_clr_intercept(svm, INTERCEPT_IRET);
3505 }
3506}
3507
3508bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
3509{
3510 struct vcpu_svm *svm = to_svm(vcpu);
3511 struct vmcb *vmcb = svm->vmcb;
3512
3513 if (!gif_set(svm))
3514 return true;
3515
3516 if (sev_es_guest(vcpu->kvm)) {
3517
3518
3519
3520
3521 if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
3522 return true;
3523 } else if (is_guest_mode(vcpu)) {
3524
3525 if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
3526 ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
3527 : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
3528 return true;
3529
3530
3531 if (nested_exit_on_intr(svm))
3532 return false;
3533 } else {
3534 if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
3535 return true;
3536 }
3537
3538 return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK);
3539}
3540
3541static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3542{
3543 struct vcpu_svm *svm = to_svm(vcpu);
3544 if (svm->nested.nested_run_pending)
3545 return -EBUSY;
3546
3547
3548
3549
3550
3551 if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
3552 return -EBUSY;
3553
3554 return !svm_interrupt_blocked(vcpu);
3555}
3556
3557static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
3558{
3559 struct vcpu_svm *svm = to_svm(vcpu);
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569 if (vgif_enabled(svm) || gif_set(svm)) {
3570
3571
3572
3573
3574
3575
3576 svm_toggle_avic_for_irq_window(vcpu, false);
3577 svm_set_vintr(svm);
3578 }
3579}
3580
3581static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
3582{
3583 struct vcpu_svm *svm = to_svm(vcpu);
3584
3585 if ((vcpu->arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) == HF_NMI_MASK)
3586 return;
3587
3588 if (!gif_set(svm)) {
3589 if (vgif_enabled(svm))
3590 svm_set_intercept(svm, INTERCEPT_STGI);
3591 return;
3592 }
3593
3594
3595
3596
3597
3598 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
3599 svm->nmi_singlestep = true;
3600 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3601}
3602
3603static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3604{
3605 return 0;
3606}
3607
3608static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
3609{
3610 return 0;
3611}
3612
3613void svm_flush_tlb(struct kvm_vcpu *vcpu)
3614{
3615 struct vcpu_svm *svm = to_svm(vcpu);
3616
3617
3618
3619
3620
3621
3622
3623
3624 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3625 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3626 else
3627 svm->current_vmcb->asid_generation--;
3628}
3629
3630static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
3631{
3632 struct vcpu_svm *svm = to_svm(vcpu);
3633
3634 invlpga(gva, svm->vmcb->control.asid);
3635}
3636
3637static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3638{
3639 struct vcpu_svm *svm = to_svm(vcpu);
3640
3641 if (nested_svm_virtualize_tpr(vcpu))
3642 return;
3643
3644 if (!svm_is_intercept(svm, INTERCEPT_CR8_WRITE)) {
3645 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3646 kvm_set_cr8(vcpu, cr8);
3647 }
3648}
3649
3650static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3651{
3652 struct vcpu_svm *svm = to_svm(vcpu);
3653 u64 cr8;
3654
3655 if (nested_svm_virtualize_tpr(vcpu) ||
3656 kvm_vcpu_apicv_active(vcpu))
3657 return;
3658
3659 cr8 = kvm_get_cr8(vcpu);
3660 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3661 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3662}
3663
3664static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
3665{
3666 struct vcpu_svm *svm = to_svm(vcpu);
3667 u8 vector;
3668 int type;
3669 u32 exitintinfo = svm->vmcb->control.exit_int_info;
3670 unsigned int3_injected = svm->int3_injected;
3671
3672 svm->int3_injected = 0;
3673
3674
3675
3676
3677
3678 if ((vcpu->arch.hflags & HF_IRET_MASK) &&
3679 (sev_es_guest(vcpu->kvm) ||
3680 kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
3681 vcpu->arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3682 kvm_make_request(KVM_REQ_EVENT, vcpu);
3683 }
3684
3685 vcpu->arch.nmi_injected = false;
3686 kvm_clear_exception_queue(vcpu);
3687 kvm_clear_interrupt_queue(vcpu);
3688
3689 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3690 return;
3691
3692 kvm_make_request(KVM_REQ_EVENT, vcpu);
3693
3694 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3695 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3696
3697 switch (type) {
3698 case SVM_EXITINTINFO_TYPE_NMI:
3699 vcpu->arch.nmi_injected = true;
3700 break;
3701 case SVM_EXITINTINFO_TYPE_EXEPT:
3702
3703
3704
3705 if (vector == X86_TRAP_VC)
3706 break;
3707
3708
3709
3710
3711
3712
3713 if (kvm_exception_is_soft(vector)) {
3714 if (vector == BP_VECTOR && int3_injected &&
3715 kvm_is_linear_rip(vcpu, svm->int3_rip))
3716 kvm_rip_write(vcpu,
3717 kvm_rip_read(vcpu) - int3_injected);
3718 break;
3719 }
3720 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3721 u32 err = svm->vmcb->control.exit_int_info_err;
3722 kvm_requeue_exception_e(vcpu, vector, err);
3723
3724 } else
3725 kvm_requeue_exception(vcpu, vector);
3726 break;
3727 case SVM_EXITINTINFO_TYPE_INTR:
3728 kvm_queue_interrupt(vcpu, vector, false);
3729 break;
3730 default:
3731 break;
3732 }
3733}
3734
3735static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3736{
3737 struct vcpu_svm *svm = to_svm(vcpu);
3738 struct vmcb_control_area *control = &svm->vmcb->control;
3739
3740 control->exit_int_info = control->event_inj;
3741 control->exit_int_info_err = control->event_inj_err;
3742 control->event_inj = 0;
3743 svm_complete_interrupts(vcpu);
3744}
3745
3746static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
3747{
3748 if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
3749 to_svm(vcpu)->vmcb->control.exit_info_1)
3750 return handle_fastpath_set_msr_irqoff(vcpu);
3751
3752 return EXIT_FASTPATH_NONE;
3753}
3754
3755static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
3756{
3757 struct vcpu_svm *svm = to_svm(vcpu);
3758 unsigned long vmcb_pa = svm->current_vmcb->pa;
3759
3760 kvm_guest_enter_irqoff();
3761
3762 if (sev_es_guest(vcpu->kvm)) {
3763 __svm_sev_es_vcpu_run(vmcb_pa);
3764 } else {
3765 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
3766
3767
3768
3769
3770
3771
3772
3773 vmload(svm->vmcb01.pa);
3774 __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
3775 vmsave(svm->vmcb01.pa);
3776
3777 vmload(__sme_page_pa(sd->save_area));
3778 }
3779
3780 kvm_guest_exit_irqoff();
3781}
3782
3783static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
3784{
3785 struct vcpu_svm *svm = to_svm(vcpu);
3786
3787 trace_kvm_entry(vcpu);
3788
3789 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3790 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3791 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3792
3793
3794
3795
3796
3797
3798
3799 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
3800
3801
3802
3803
3804
3805 disable_nmi_singlestep(svm);
3806 smp_send_reschedule(vcpu->cpu);
3807 }
3808
3809 pre_svm_run(vcpu);
3810
3811 sync_lapic_to_cr8(vcpu);
3812
3813 if (unlikely(svm->asid != svm->vmcb->control.asid)) {
3814 svm->vmcb->control.asid = svm->asid;
3815 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
3816 }
3817 svm->vmcb->save.cr2 = vcpu->arch.cr2;
3818
3819 svm_hv_update_vp_id(svm->vmcb, vcpu);
3820
3821
3822
3823
3824
3825 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
3826 svm_set_dr6(svm, vcpu->arch.dr6);
3827 else
3828 svm_set_dr6(svm, DR6_ACTIVE_LOW);
3829
3830 clgi();
3831 kvm_load_guest_xsave_state(vcpu);
3832
3833 kvm_wait_lapic_expire(vcpu);
3834
3835
3836
3837
3838
3839
3840
3841 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
3842 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
3843
3844 svm_vcpu_enter_exit(vcpu);
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
3862 unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
3863 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
3864
3865 if (!sev_es_guest(vcpu->kvm))
3866 reload_tss(vcpu);
3867
3868 if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
3869 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
3870
3871 if (!sev_es_guest(vcpu->kvm)) {
3872 vcpu->arch.cr2 = svm->vmcb->save.cr2;
3873 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3874 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3875 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3876 }
3877
3878 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3879 kvm_before_interrupt(vcpu);
3880
3881 kvm_load_host_xsave_state(vcpu);
3882 stgi();
3883
3884
3885
3886 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3887 kvm_after_interrupt(vcpu);
3888
3889 sync_cr8_to_lapic(vcpu);
3890
3891 svm->next_rip = 0;
3892 if (is_guest_mode(vcpu)) {
3893 nested_sync_control_from_vmcb02(svm);
3894
3895
3896 if (svm->nested.nested_run_pending &&
3897 svm->vmcb->control.exit_code != SVM_EXIT_ERR)
3898 ++vcpu->stat.nested_run;
3899
3900 svm->nested.nested_run_pending = 0;
3901 }
3902
3903 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3904 vmcb_mark_all_clean(svm->vmcb);
3905
3906
3907 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3908 vcpu->arch.apf.host_apf_flags =
3909 kvm_read_and_reset_apf_flags();
3910
3911 if (npt_enabled)
3912 kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
3913
3914
3915
3916
3917
3918 if (unlikely(svm->vmcb->control.exit_code ==
3919 SVM_EXIT_EXCP_BASE + MC_VECTOR))
3920 svm_handle_mce(vcpu);
3921
3922 svm_complete_interrupts(vcpu);
3923
3924 if (is_guest_mode(vcpu))
3925 return EXIT_FASTPATH_NONE;
3926
3927 return svm_exit_handlers_fastpath(vcpu);
3928}
3929
3930static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
3931 int root_level)
3932{
3933 struct vcpu_svm *svm = to_svm(vcpu);
3934 unsigned long cr3;
3935
3936 if (npt_enabled) {
3937 svm->vmcb->control.nested_cr3 = __sme_set(root_hpa);
3938 vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
3939
3940 hv_track_root_tdp(vcpu, root_hpa);
3941
3942
3943 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
3944 return;
3945 cr3 = vcpu->arch.cr3;
3946 } else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
3947 cr3 = __sme_set(root_hpa) | kvm_get_active_pcid(vcpu);
3948 } else {
3949
3950 WARN_ON_ONCE(kvm_get_active_pcid(vcpu));
3951 cr3 = root_hpa;
3952 }
3953
3954 svm->vmcb->save.cr3 = cr3;
3955 vmcb_mark_dirty(svm->vmcb, VMCB_CR);
3956}
3957
3958static int is_disabled(void)
3959{
3960 u64 vm_cr;
3961
3962 rdmsrl(MSR_VM_CR, vm_cr);
3963 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3964 return 1;
3965
3966 return 0;
3967}
3968
3969static void
3970svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3971{
3972
3973
3974
3975 hypercall[0] = 0x0f;
3976 hypercall[1] = 0x01;
3977 hypercall[2] = 0xd9;
3978}
3979
3980static int __init svm_check_processor_compat(void)
3981{
3982 return 0;
3983}
3984
3985static bool svm_cpu_has_accelerated_tpr(void)
3986{
3987 return false;
3988}
3989
3990
3991
3992
3993
3994static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
3995{
3996 switch (index) {
3997 case MSR_IA32_MCG_EXT_CTL:
3998 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3999 return false;
4000 case MSR_IA32_SMBASE:
4001
4002 if (kvm && sev_es_guest(kvm))
4003 return false;
4004 break;
4005 default:
4006 break;
4007 }
4008
4009 return true;
4010}
4011
4012static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
4013{
4014 return 0;
4015}
4016
4017static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
4018{
4019 struct vcpu_svm *svm = to_svm(vcpu);
4020 struct kvm_cpuid_entry2 *best;
4021
4022 vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4023 boot_cpu_has(X86_FEATURE_XSAVE) &&
4024 boot_cpu_has(X86_FEATURE_XSAVES);
4025
4026
4027 svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
4028 guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
4029
4030 svm_recalc_instruction_intercepts(vcpu, svm);
4031
4032
4033 if (sev_guest(vcpu->kvm)) {
4034 best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
4035 if (best)
4036 vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
4037 }
4038
4039 if (kvm_vcpu_apicv_active(vcpu)) {
4040
4041
4042
4043
4044 if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
4045 kvm_request_apicv_update(vcpu->kvm, false,
4046 APICV_INHIBIT_REASON_X2APIC);
4047
4048
4049
4050
4051
4052 if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
4053 kvm_request_apicv_update(vcpu->kvm, false,
4054 APICV_INHIBIT_REASON_NESTED);
4055 }
4056
4057 if (guest_cpuid_is_intel(vcpu)) {
4058
4059
4060
4061
4062
4063 svm_set_intercept(svm, INTERCEPT_VMLOAD);
4064 svm_set_intercept(svm, INTERCEPT_VMSAVE);
4065 svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
4066
4067 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
4068 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
4069 } else {
4070
4071
4072
4073
4074 if (vls) {
4075 svm_clr_intercept(svm, INTERCEPT_VMLOAD);
4076 svm_clr_intercept(svm, INTERCEPT_VMSAVE);
4077 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
4078 }
4079
4080 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
4081 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
4082 }
4083}
4084
4085static bool svm_has_wbinvd_exit(void)
4086{
4087 return true;
4088}
4089
4090#define PRE_EX(exit) { .exit_code = (exit), \
4091 .stage = X86_ICPT_PRE_EXCEPT, }
4092#define POST_EX(exit) { .exit_code = (exit), \
4093 .stage = X86_ICPT_POST_EXCEPT, }
4094#define POST_MEM(exit) { .exit_code = (exit), \
4095 .stage = X86_ICPT_POST_MEMACCESS, }
4096
4097static const struct __x86_intercept {
4098 u32 exit_code;
4099 enum x86_intercept_stage stage;
4100} x86_intercept_map[] = {
4101 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
4102 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
4103 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
4104 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
4105 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
4106 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
4107 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
4108 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
4109 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
4110 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
4111 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
4112 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
4113 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
4114 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
4115 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
4116 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
4117 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
4118 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
4119 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
4120 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
4121 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
4122 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
4123 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
4124 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
4125 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
4126 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
4127 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
4128 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
4129 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
4130 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
4131 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
4132 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
4133 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
4134 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
4135 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
4136 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
4137 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
4138 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
4139 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
4140 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
4141 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
4142 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
4143 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
4144 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
4145 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
4146 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
4147 [x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
4148};
4149
4150#undef PRE_EX
4151#undef POST_EX
4152#undef POST_MEM
4153
4154static int svm_check_intercept(struct kvm_vcpu *vcpu,
4155 struct x86_instruction_info *info,
4156 enum x86_intercept_stage stage,
4157 struct x86_exception *exception)
4158{
4159 struct vcpu_svm *svm = to_svm(vcpu);
4160 int vmexit, ret = X86EMUL_CONTINUE;
4161 struct __x86_intercept icpt_info;
4162 struct vmcb *vmcb = svm->vmcb;
4163
4164 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4165 goto out;
4166
4167 icpt_info = x86_intercept_map[info->intercept];
4168
4169 if (stage != icpt_info.stage)
4170 goto out;
4171
4172 switch (icpt_info.exit_code) {
4173 case SVM_EXIT_READ_CR0:
4174 if (info->intercept == x86_intercept_cr_read)
4175 icpt_info.exit_code += info->modrm_reg;
4176 break;
4177 case SVM_EXIT_WRITE_CR0: {
4178 unsigned long cr0, val;
4179
4180 if (info->intercept == x86_intercept_cr_write)
4181 icpt_info.exit_code += info->modrm_reg;
4182
4183 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
4184 info->intercept == x86_intercept_clts)
4185 break;
4186
4187 if (!(vmcb_is_intercept(&svm->nested.ctl,
4188 INTERCEPT_SELECTIVE_CR0)))
4189 break;
4190
4191 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4192 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
4193
4194 if (info->intercept == x86_intercept_lmsw) {
4195 cr0 &= 0xfUL;
4196 val &= 0xfUL;
4197
4198 if (cr0 & X86_CR0_PE)
4199 val |= X86_CR0_PE;
4200 }
4201
4202 if (cr0 ^ val)
4203 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4204
4205 break;
4206 }
4207 case SVM_EXIT_READ_DR0:
4208 case SVM_EXIT_WRITE_DR0:
4209 icpt_info.exit_code += info->modrm_reg;
4210 break;
4211 case SVM_EXIT_MSR:
4212 if (info->intercept == x86_intercept_wrmsr)
4213 vmcb->control.exit_info_1 = 1;
4214 else
4215 vmcb->control.exit_info_1 = 0;
4216 break;
4217 case SVM_EXIT_PAUSE:
4218
4219
4220
4221
4222 if (info->rep_prefix != REPE_PREFIX)
4223 goto out;
4224 break;
4225 case SVM_EXIT_IOIO: {
4226 u64 exit_info;
4227 u32 bytes;
4228
4229 if (info->intercept == x86_intercept_in ||
4230 info->intercept == x86_intercept_ins) {
4231 exit_info = ((info->src_val & 0xffff) << 16) |
4232 SVM_IOIO_TYPE_MASK;
4233 bytes = info->dst_bytes;
4234 } else {
4235 exit_info = (info->dst_val & 0xffff) << 16;
4236 bytes = info->src_bytes;
4237 }
4238
4239 if (info->intercept == x86_intercept_outs ||
4240 info->intercept == x86_intercept_ins)
4241 exit_info |= SVM_IOIO_STR_MASK;
4242
4243 if (info->rep_prefix)
4244 exit_info |= SVM_IOIO_REP_MASK;
4245
4246 bytes = min(bytes, 4u);
4247
4248 exit_in