1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "ioapic.h"
22#include "mmu.h"
23#include "i8254.h"
24#include "tss.h"
25#include "kvm_cache_regs.h"
26#include "kvm_emulate.h"
27#include "x86.h"
28#include "cpuid.h"
29#include "pmu.h"
30#include "hyperv.h"
31#include "lapic.h"
32#include "xen.h"
33
34#include <linux/clocksource.h>
35#include <linux/interrupt.h>
36#include <linux/kvm.h>
37#include <linux/fs.h>
38#include <linux/vmalloc.h>
39#include <linux/export.h>
40#include <linux/moduleparam.h>
41#include <linux/mman.h>
42#include <linux/highmem.h>
43#include <linux/iommu.h>
44#include <linux/intel-iommu.h>
45#include <linux/cpufreq.h>
46#include <linux/user-return-notifier.h>
47#include <linux/srcu.h>
48#include <linux/slab.h>
49#include <linux/perf_event.h>
50#include <linux/uaccess.h>
51#include <linux/hash.h>
52#include <linux/pci.h>
53#include <linux/timekeeper_internal.h>
54#include <linux/pvclock_gtod.h>
55#include <linux/kvm_irqfd.h>
56#include <linux/irqbypass.h>
57#include <linux/sched/stat.h>
58#include <linux/sched/isolation.h>
59#include <linux/mem_encrypt.h>
60#include <linux/entry-kvm.h>
61#include <linux/suspend.h>
62
63#include <trace/events/kvm.h>
64
65#include <asm/debugreg.h>
66#include <asm/msr.h>
67#include <asm/desc.h>
68#include <asm/mce.h>
69#include <asm/pkru.h>
70#include <linux/kernel_stat.h>
71#include <asm/fpu/internal.h>
72#include <asm/pvclock.h>
73#include <asm/div64.h>
74#include <asm/irq_remapping.h>
75#include <asm/mshyperv.h>
76#include <asm/hypervisor.h>
77#include <asm/tlbflush.h>
78#include <asm/intel_pt.h>
79#include <asm/emulate_prefix.h>
80#include <asm/sgx.h>
81#include <clocksource/hyperv_timer.h>
82
83#define CREATE_TRACE_POINTS
84#include "trace.h"
85
86#define MAX_IO_MSRS 256
87#define KVM_MAX_MCE_BANKS 32
88u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
89EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
90
91#define emul_to_vcpu(ctxt) \
92 ((struct kvm_vcpu *)(ctxt)->vcpu)
93
94
95
96
97
98#ifdef CONFIG_X86_64
99static
100u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
101#else
102static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
103#endif
104
105static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
106
107#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
108
109#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
110 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
111
112static void update_cr8_intercept(struct kvm_vcpu *vcpu);
113static void process_nmi(struct kvm_vcpu *vcpu);
114static void process_smi(struct kvm_vcpu *vcpu);
115static void enter_smm(struct kvm_vcpu *vcpu);
116static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
117static void store_regs(struct kvm_vcpu *vcpu);
118static int sync_regs(struct kvm_vcpu *vcpu);
119
120static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
121static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
122
123struct kvm_x86_ops kvm_x86_ops __read_mostly;
124EXPORT_SYMBOL_GPL(kvm_x86_ops);
125
126#define KVM_X86_OP(func) \
127 DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \
128 *(((struct kvm_x86_ops *)0)->func));
129#define KVM_X86_OP_NULL KVM_X86_OP
130#include <asm/kvm-x86-ops.h>
131EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
132EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
133EXPORT_STATIC_CALL_GPL(kvm_x86_tlb_flush_current);
134
135static bool __read_mostly ignore_msrs = 0;
136module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
137
138bool __read_mostly report_ignored_msrs = true;
139module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
140EXPORT_SYMBOL_GPL(report_ignored_msrs);
141
142unsigned int min_timer_period_us = 200;
143module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
144
145static bool __read_mostly kvmclock_periodic_sync = true;
146module_param(kvmclock_periodic_sync, bool, S_IRUGO);
147
148bool __read_mostly kvm_has_tsc_control;
149EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
150u32 __read_mostly kvm_max_guest_tsc_khz;
151EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
152u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
153EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
154u64 __read_mostly kvm_max_tsc_scaling_ratio;
155EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
156u64 __read_mostly kvm_default_tsc_scaling_ratio;
157EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
158bool __read_mostly kvm_has_bus_lock_exit;
159EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
160
161
162static u32 __read_mostly tsc_tolerance_ppm = 250;
163module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
164
165
166
167
168
169
170
171static int __read_mostly lapic_timer_advance_ns = -1;
172module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
173
174static bool __read_mostly vector_hashing = true;
175module_param(vector_hashing, bool, S_IRUGO);
176
177bool __read_mostly enable_vmware_backdoor = false;
178module_param(enable_vmware_backdoor, bool, S_IRUGO);
179EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
180
181static bool __read_mostly force_emulation_prefix = false;
182module_param(force_emulation_prefix, bool, S_IRUGO);
183
184int __read_mostly pi_inject_timer = -1;
185module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
186
187
188
189
190
191
192#define KVM_MAX_NR_USER_RETURN_MSRS 16
193
194struct kvm_user_return_msrs {
195 struct user_return_notifier urn;
196 bool registered;
197 struct kvm_user_return_msr_values {
198 u64 host;
199 u64 curr;
200 } values[KVM_MAX_NR_USER_RETURN_MSRS];
201};
202
203u32 __read_mostly kvm_nr_uret_msrs;
204EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
205static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
206static struct kvm_user_return_msrs __percpu *user_return_msrs;
207
208#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
209 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
210 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
211 | XFEATURE_MASK_PKRU)
212
213u64 __read_mostly host_efer;
214EXPORT_SYMBOL_GPL(host_efer);
215
216bool __read_mostly allow_smaller_maxphyaddr = 0;
217EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
218
219bool __read_mostly enable_apicv = true;
220EXPORT_SYMBOL_GPL(enable_apicv);
221
222u64 __read_mostly host_xss;
223EXPORT_SYMBOL_GPL(host_xss);
224u64 __read_mostly supported_xss;
225EXPORT_SYMBOL_GPL(supported_xss);
226
227const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
228 KVM_GENERIC_VM_STATS(),
229 STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
230 STATS_DESC_COUNTER(VM, mmu_pte_write),
231 STATS_DESC_COUNTER(VM, mmu_pde_zapped),
232 STATS_DESC_COUNTER(VM, mmu_flooded),
233 STATS_DESC_COUNTER(VM, mmu_recycled),
234 STATS_DESC_COUNTER(VM, mmu_cache_miss),
235 STATS_DESC_ICOUNTER(VM, mmu_unsync),
236 STATS_DESC_ICOUNTER(VM, lpages),
237 STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
238 STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
239};
240static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
241 sizeof(struct kvm_vm_stat) / sizeof(u64));
242
243const struct kvm_stats_header kvm_vm_stats_header = {
244 .name_size = KVM_STATS_NAME_SIZE,
245 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
246 .id_offset = sizeof(struct kvm_stats_header),
247 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
248 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
249 sizeof(kvm_vm_stats_desc),
250};
251
252const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
253 KVM_GENERIC_VCPU_STATS(),
254 STATS_DESC_COUNTER(VCPU, pf_fixed),
255 STATS_DESC_COUNTER(VCPU, pf_guest),
256 STATS_DESC_COUNTER(VCPU, tlb_flush),
257 STATS_DESC_COUNTER(VCPU, invlpg),
258 STATS_DESC_COUNTER(VCPU, exits),
259 STATS_DESC_COUNTER(VCPU, io_exits),
260 STATS_DESC_COUNTER(VCPU, mmio_exits),
261 STATS_DESC_COUNTER(VCPU, signal_exits),
262 STATS_DESC_COUNTER(VCPU, irq_window_exits),
263 STATS_DESC_COUNTER(VCPU, nmi_window_exits),
264 STATS_DESC_COUNTER(VCPU, l1d_flush),
265 STATS_DESC_COUNTER(VCPU, halt_exits),
266 STATS_DESC_COUNTER(VCPU, request_irq_exits),
267 STATS_DESC_COUNTER(VCPU, irq_exits),
268 STATS_DESC_COUNTER(VCPU, host_state_reload),
269 STATS_DESC_COUNTER(VCPU, fpu_reload),
270 STATS_DESC_COUNTER(VCPU, insn_emulation),
271 STATS_DESC_COUNTER(VCPU, insn_emulation_fail),
272 STATS_DESC_COUNTER(VCPU, hypercalls),
273 STATS_DESC_COUNTER(VCPU, irq_injections),
274 STATS_DESC_COUNTER(VCPU, nmi_injections),
275 STATS_DESC_COUNTER(VCPU, req_event),
276 STATS_DESC_COUNTER(VCPU, nested_run),
277 STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
278 STATS_DESC_COUNTER(VCPU, directed_yield_successful),
279 STATS_DESC_ICOUNTER(VCPU, guest_mode)
280};
281static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
282 sizeof(struct kvm_vcpu_stat) / sizeof(u64));
283
284const struct kvm_stats_header kvm_vcpu_stats_header = {
285 .name_size = KVM_STATS_NAME_SIZE,
286 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
287 .id_offset = sizeof(struct kvm_stats_header),
288 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
289 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
290 sizeof(kvm_vcpu_stats_desc),
291};
292
293u64 __read_mostly host_xcr0;
294u64 __read_mostly supported_xcr0;
295EXPORT_SYMBOL_GPL(supported_xcr0);
296
297static struct kmem_cache *x86_fpu_cache;
298
299static struct kmem_cache *x86_emulator_cache;
300
301
302
303
304
305static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
306{
307 const char *op = write ? "wrmsr" : "rdmsr";
308
309 if (ignore_msrs) {
310 if (report_ignored_msrs)
311 kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
312 op, msr, data);
313
314 return true;
315 } else {
316 kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
317 op, msr, data);
318 return false;
319 }
320}
321
322static struct kmem_cache *kvm_alloc_emulator_cache(void)
323{
324 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
325 unsigned int size = sizeof(struct x86_emulate_ctxt);
326
327 return kmem_cache_create_usercopy("x86_emulator", size,
328 __alignof__(struct x86_emulate_ctxt),
329 SLAB_ACCOUNT, useroffset,
330 size - useroffset, NULL);
331}
332
333static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
334
335static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
336{
337 int i;
338 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
339 vcpu->arch.apf.gfns[i] = ~0;
340}
341
342static void kvm_on_user_return(struct user_return_notifier *urn)
343{
344 unsigned slot;
345 struct kvm_user_return_msrs *msrs
346 = container_of(urn, struct kvm_user_return_msrs, urn);
347 struct kvm_user_return_msr_values *values;
348 unsigned long flags;
349
350
351
352
353
354 local_irq_save(flags);
355 if (msrs->registered) {
356 msrs->registered = false;
357 user_return_notifier_unregister(urn);
358 }
359 local_irq_restore(flags);
360 for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
361 values = &msrs->values[slot];
362 if (values->host != values->curr) {
363 wrmsrl(kvm_uret_msrs_list[slot], values->host);
364 values->curr = values->host;
365 }
366 }
367}
368
369static int kvm_probe_user_return_msr(u32 msr)
370{
371 u64 val;
372 int ret;
373
374 preempt_disable();
375 ret = rdmsrl_safe(msr, &val);
376 if (ret)
377 goto out;
378 ret = wrmsrl_safe(msr, val);
379out:
380 preempt_enable();
381 return ret;
382}
383
384int kvm_add_user_return_msr(u32 msr)
385{
386 BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
387
388 if (kvm_probe_user_return_msr(msr))
389 return -1;
390
391 kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
392 return kvm_nr_uret_msrs++;
393}
394EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
395
396int kvm_find_user_return_msr(u32 msr)
397{
398 int i;
399
400 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
401 if (kvm_uret_msrs_list[i] == msr)
402 return i;
403 }
404 return -1;
405}
406EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
407
408static void kvm_user_return_msr_cpu_online(void)
409{
410 unsigned int cpu = smp_processor_id();
411 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
412 u64 value;
413 int i;
414
415 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
416 rdmsrl_safe(kvm_uret_msrs_list[i], &value);
417 msrs->values[i].host = value;
418 msrs->values[i].curr = value;
419 }
420}
421
422int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
423{
424 unsigned int cpu = smp_processor_id();
425 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
426 int err;
427
428 value = (value & mask) | (msrs->values[slot].host & ~mask);
429 if (value == msrs->values[slot].curr)
430 return 0;
431 err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
432 if (err)
433 return 1;
434
435 msrs->values[slot].curr = value;
436 if (!msrs->registered) {
437 msrs->urn.on_user_return = kvm_on_user_return;
438 user_return_notifier_register(&msrs->urn);
439 msrs->registered = true;
440 }
441 return 0;
442}
443EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
444
445static void drop_user_return_notifiers(void)
446{
447 unsigned int cpu = smp_processor_id();
448 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
449
450 if (msrs->registered)
451 kvm_on_user_return(&msrs->urn);
452}
453
454u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
455{
456 return vcpu->arch.apic_base;
457}
458EXPORT_SYMBOL_GPL(kvm_get_apic_base);
459
460enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
461{
462 return kvm_apic_mode(kvm_get_apic_base(vcpu));
463}
464EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
465
466int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
467{
468 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
469 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
470 u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
471 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
472
473 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
474 return 1;
475 if (!msr_info->host_initiated) {
476 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
477 return 1;
478 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
479 return 1;
480 }
481
482 kvm_lapic_set_base(vcpu, msr_info->data);
483 kvm_recalculate_apic_map(vcpu->kvm);
484 return 0;
485}
486EXPORT_SYMBOL_GPL(kvm_set_apic_base);
487
488asmlinkage __visible noinstr void kvm_spurious_fault(void)
489{
490
491 BUG_ON(!kvm_rebooting);
492}
493EXPORT_SYMBOL_GPL(kvm_spurious_fault);
494
495#define EXCPT_BENIGN 0
496#define EXCPT_CONTRIBUTORY 1
497#define EXCPT_PF 2
498
499static int exception_class(int vector)
500{
501 switch (vector) {
502 case PF_VECTOR:
503 return EXCPT_PF;
504 case DE_VECTOR:
505 case TS_VECTOR:
506 case NP_VECTOR:
507 case SS_VECTOR:
508 case GP_VECTOR:
509 return EXCPT_CONTRIBUTORY;
510 default:
511 break;
512 }
513 return EXCPT_BENIGN;
514}
515
516#define EXCPT_FAULT 0
517#define EXCPT_TRAP 1
518#define EXCPT_ABORT 2
519#define EXCPT_INTERRUPT 3
520
521static int exception_type(int vector)
522{
523 unsigned int mask;
524
525 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
526 return EXCPT_INTERRUPT;
527
528 mask = 1 << vector;
529
530
531 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
532 return EXCPT_TRAP;
533
534 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
535 return EXCPT_ABORT;
536
537
538 return EXCPT_FAULT;
539}
540
541void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
542{
543 unsigned nr = vcpu->arch.exception.nr;
544 bool has_payload = vcpu->arch.exception.has_payload;
545 unsigned long payload = vcpu->arch.exception.payload;
546
547 if (!has_payload)
548 return;
549
550 switch (nr) {
551 case DB_VECTOR:
552
553
554
555
556
557 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574 vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
575 vcpu->arch.dr6 |= payload;
576 vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
577
578
579
580
581
582
583
584 vcpu->arch.dr6 &= ~BIT(12);
585 break;
586 case PF_VECTOR:
587 vcpu->arch.cr2 = payload;
588 break;
589 }
590
591 vcpu->arch.exception.has_payload = false;
592 vcpu->arch.exception.payload = 0;
593}
594EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
595
596static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
597 unsigned nr, bool has_error, u32 error_code,
598 bool has_payload, unsigned long payload, bool reinject)
599{
600 u32 prev_nr;
601 int class1, class2;
602
603 kvm_make_request(KVM_REQ_EVENT, vcpu);
604
605 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
606 queue:
607 if (reinject) {
608
609
610
611
612
613
614
615
616 WARN_ON_ONCE(vcpu->arch.exception.pending);
617 vcpu->arch.exception.injected = true;
618 if (WARN_ON_ONCE(has_payload)) {
619
620
621
622
623 has_payload = false;
624 payload = 0;
625 }
626 } else {
627 vcpu->arch.exception.pending = true;
628 vcpu->arch.exception.injected = false;
629 }
630 vcpu->arch.exception.has_error_code = has_error;
631 vcpu->arch.exception.nr = nr;
632 vcpu->arch.exception.error_code = error_code;
633 vcpu->arch.exception.has_payload = has_payload;
634 vcpu->arch.exception.payload = payload;
635 if (!is_guest_mode(vcpu))
636 kvm_deliver_exception_payload(vcpu);
637 return;
638 }
639
640
641 prev_nr = vcpu->arch.exception.nr;
642 if (prev_nr == DF_VECTOR) {
643
644 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
645 return;
646 }
647 class1 = exception_class(prev_nr);
648 class2 = exception_class(nr);
649 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
650 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
651
652
653
654
655
656 vcpu->arch.exception.pending = true;
657 vcpu->arch.exception.injected = false;
658 vcpu->arch.exception.has_error_code = true;
659 vcpu->arch.exception.nr = DF_VECTOR;
660 vcpu->arch.exception.error_code = 0;
661 vcpu->arch.exception.has_payload = false;
662 vcpu->arch.exception.payload = 0;
663 } else
664
665
666
667 goto queue;
668}
669
670void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
671{
672 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
673}
674EXPORT_SYMBOL_GPL(kvm_queue_exception);
675
676void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
677{
678 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
679}
680EXPORT_SYMBOL_GPL(kvm_requeue_exception);
681
682void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
683 unsigned long payload)
684{
685 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
686}
687EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
688
689static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
690 u32 error_code, unsigned long payload)
691{
692 kvm_multiple_exception(vcpu, nr, true, error_code,
693 true, payload, false);
694}
695
696int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
697{
698 if (err)
699 kvm_inject_gp(vcpu, 0);
700 else
701 return kvm_skip_emulated_instruction(vcpu);
702
703 return 1;
704}
705EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
706
707void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
708{
709 ++vcpu->stat.pf_guest;
710 vcpu->arch.exception.nested_apf =
711 is_guest_mode(vcpu) && fault->async_page_fault;
712 if (vcpu->arch.exception.nested_apf) {
713 vcpu->arch.apf.nested_apf_token = fault->address;
714 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
715 } else {
716 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
717 fault->address);
718 }
719}
720EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
721
722bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
723 struct x86_exception *fault)
724{
725 struct kvm_mmu *fault_mmu;
726 WARN_ON_ONCE(fault->vector != PF_VECTOR);
727
728 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
729 vcpu->arch.walk_mmu;
730
731
732
733
734
735 if ((fault->error_code & PFERR_PRESENT_MASK) &&
736 !(fault->error_code & PFERR_RSVD_MASK))
737 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
738 fault_mmu->root_hpa);
739
740 fault_mmu->inject_page_fault(vcpu, fault);
741 return fault->nested_page_fault;
742}
743EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
744
745void kvm_inject_nmi(struct kvm_vcpu *vcpu)
746{
747 atomic_inc(&vcpu->arch.nmi_queued);
748 kvm_make_request(KVM_REQ_NMI, vcpu);
749}
750EXPORT_SYMBOL_GPL(kvm_inject_nmi);
751
752void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
753{
754 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
755}
756EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
757
758void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
759{
760 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
761}
762EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
763
764
765
766
767
768bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
769{
770 if (static_call(kvm_x86_get_cpl)(vcpu) <= required_cpl)
771 return true;
772 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
773 return false;
774}
775EXPORT_SYMBOL_GPL(kvm_require_cpl);
776
777bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
778{
779 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
780 return true;
781
782 kvm_queue_exception(vcpu, UD_VECTOR);
783 return false;
784}
785EXPORT_SYMBOL_GPL(kvm_require_dr);
786
787
788
789
790
791
792int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
793 gfn_t ngfn, void *data, int offset, int len,
794 u32 access)
795{
796 struct x86_exception exception;
797 gfn_t real_gfn;
798 gpa_t ngpa;
799
800 ngpa = gfn_to_gpa(ngfn);
801 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
802 if (real_gfn == UNMAPPED_GVA)
803 return -EFAULT;
804
805 real_gfn = gpa_to_gfn(real_gfn);
806
807 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
808}
809EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
810
811static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
812{
813 return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
814}
815
816
817
818
819int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
820{
821 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
822 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
823 int i;
824 int ret;
825 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
826
827 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
828 offset * sizeof(u64), sizeof(pdpte),
829 PFERR_USER_MASK|PFERR_WRITE_MASK);
830 if (ret < 0) {
831 ret = 0;
832 goto out;
833 }
834 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
835 if ((pdpte[i] & PT_PRESENT_MASK) &&
836 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
837 ret = 0;
838 goto out;
839 }
840 }
841 ret = 1;
842
843 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
844 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
845 vcpu->arch.pdptrs_from_userspace = false;
846
847out:
848
849 return ret;
850}
851EXPORT_SYMBOL_GPL(load_pdptrs);
852
853void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
854{
855 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
856 kvm_clear_async_pf_completion_queue(vcpu);
857 kvm_async_pf_hash_reset(vcpu);
858 }
859
860 if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
861 kvm_mmu_reset_context(vcpu);
862
863 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
864 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
865 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
866 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
867}
868EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
869
870int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
871{
872 unsigned long old_cr0 = kvm_read_cr0(vcpu);
873 unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
874
875 cr0 |= X86_CR0_ET;
876
877#ifdef CONFIG_X86_64
878 if (cr0 & 0xffffffff00000000UL)
879 return 1;
880#endif
881
882 cr0 &= ~CR0_RESERVED_BITS;
883
884 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
885 return 1;
886
887 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
888 return 1;
889
890#ifdef CONFIG_X86_64
891 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
892 (cr0 & X86_CR0_PG)) {
893 int cs_db, cs_l;
894
895 if (!is_pae(vcpu))
896 return 1;
897 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
898 if (cs_l)
899 return 1;
900 }
901#endif
902 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
903 is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
904 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
905 return 1;
906
907 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
908 return 1;
909
910 static_call(kvm_x86_set_cr0)(vcpu, cr0);
911
912 kvm_post_set_cr0(vcpu, old_cr0, cr0);
913
914 return 0;
915}
916EXPORT_SYMBOL_GPL(kvm_set_cr0);
917
918void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
919{
920 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
921}
922EXPORT_SYMBOL_GPL(kvm_lmsw);
923
924void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
925{
926 if (vcpu->arch.guest_state_protected)
927 return;
928
929 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
930
931 if (vcpu->arch.xcr0 != host_xcr0)
932 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
933
934 if (vcpu->arch.xsaves_enabled &&
935 vcpu->arch.ia32_xss != host_xss)
936 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
937 }
938
939 if (static_cpu_has(X86_FEATURE_PKU) &&
940 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
941 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
942 vcpu->arch.pkru != vcpu->arch.host_pkru)
943 write_pkru(vcpu->arch.pkru);
944}
945EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
946
947void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
948{
949 if (vcpu->arch.guest_state_protected)
950 return;
951
952 if (static_cpu_has(X86_FEATURE_PKU) &&
953 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
954 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
955 vcpu->arch.pkru = rdpkru();
956 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
957 write_pkru(vcpu->arch.host_pkru);
958 }
959
960 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
961
962 if (vcpu->arch.xcr0 != host_xcr0)
963 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
964
965 if (vcpu->arch.xsaves_enabled &&
966 vcpu->arch.ia32_xss != host_xss)
967 wrmsrl(MSR_IA32_XSS, host_xss);
968 }
969
970}
971EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
972
973static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
974{
975 u64 xcr0 = xcr;
976 u64 old_xcr0 = vcpu->arch.xcr0;
977 u64 valid_bits;
978
979
980 if (index != XCR_XFEATURE_ENABLED_MASK)
981 return 1;
982 if (!(xcr0 & XFEATURE_MASK_FP))
983 return 1;
984 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
985 return 1;
986
987
988
989
990
991
992 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
993 if (xcr0 & ~valid_bits)
994 return 1;
995
996 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
997 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
998 return 1;
999
1000 if (xcr0 & XFEATURE_MASK_AVX512) {
1001 if (!(xcr0 & XFEATURE_MASK_YMM))
1002 return 1;
1003 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
1004 return 1;
1005 }
1006 vcpu->arch.xcr0 = xcr0;
1007
1008 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
1009 kvm_update_cpuid_runtime(vcpu);
1010 return 0;
1011}
1012
1013int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
1014{
1015 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
1016 __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
1017 kvm_inject_gp(vcpu, 0);
1018 return 1;
1019 }
1020
1021 return kvm_skip_emulated_instruction(vcpu);
1022}
1023EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
1024
1025bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1026{
1027 if (cr4 & cr4_reserved_bits)
1028 return false;
1029
1030 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
1031 return false;
1032
1033 return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
1034}
1035EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
1036
1037void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
1038{
1039 if (((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) ||
1040 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
1041 kvm_mmu_reset_context(vcpu);
1042}
1043EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
1044
1045int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1046{
1047 unsigned long old_cr4 = kvm_read_cr4(vcpu);
1048 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
1049 X86_CR4_SMEP;
1050
1051 if (!kvm_is_valid_cr4(vcpu, cr4))
1052 return 1;
1053
1054 if (is_long_mode(vcpu)) {
1055 if (!(cr4 & X86_CR4_PAE))
1056 return 1;
1057 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
1058 return 1;
1059 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
1060 && ((cr4 ^ old_cr4) & pdptr_bits)
1061 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
1062 kvm_read_cr3(vcpu)))
1063 return 1;
1064
1065 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
1066 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
1067 return 1;
1068
1069
1070 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1071 return 1;
1072 }
1073
1074 static_call(kvm_x86_set_cr4)(vcpu, cr4);
1075
1076 kvm_post_set_cr4(vcpu, old_cr4, cr4);
1077
1078 return 0;
1079}
1080EXPORT_SYMBOL_GPL(kvm_set_cr4);
1081
1082static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
1083{
1084 struct kvm_mmu *mmu = vcpu->arch.mmu;
1085 unsigned long roots_to_free = 0;
1086 int i;
1087
1088
1089
1090
1091
1092
1093 if (kvm_get_active_pcid(vcpu) == pcid) {
1094 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
1095 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1096 }
1097
1098 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
1099 if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
1100 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
1101
1102 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
1103}
1104
1105int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1106{
1107 bool skip_tlb_flush = false;
1108 unsigned long pcid = 0;
1109#ifdef CONFIG_X86_64
1110 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1111
1112 if (pcid_enabled) {
1113 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1114 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1115 pcid = cr3 & X86_CR3_PCID_MASK;
1116 }
1117#endif
1118
1119
1120 if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
1121 goto handle_tlb_flush;
1122
1123
1124
1125
1126
1127
1128 if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
1129 return 1;
1130
1131 if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
1132 return 1;
1133
1134 if (cr3 != kvm_read_cr3(vcpu))
1135 kvm_mmu_new_pgd(vcpu, cr3);
1136
1137 vcpu->arch.cr3 = cr3;
1138 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1139
1140handle_tlb_flush:
1141
1142
1143
1144
1145
1146
1147
1148 if (!skip_tlb_flush)
1149 kvm_invalidate_pcid(vcpu, pcid);
1150
1151 return 0;
1152}
1153EXPORT_SYMBOL_GPL(kvm_set_cr3);
1154
1155int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1156{
1157 if (cr8 & CR8_RESERVED_BITS)
1158 return 1;
1159 if (lapic_in_kernel(vcpu))
1160 kvm_lapic_set_tpr(vcpu, cr8);
1161 else
1162 vcpu->arch.cr8 = cr8;
1163 return 0;
1164}
1165EXPORT_SYMBOL_GPL(kvm_set_cr8);
1166
1167unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1168{
1169 if (lapic_in_kernel(vcpu))
1170 return kvm_lapic_get_cr8(vcpu);
1171 else
1172 return vcpu->arch.cr8;
1173}
1174EXPORT_SYMBOL_GPL(kvm_get_cr8);
1175
1176static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1177{
1178 int i;
1179
1180 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1181 for (i = 0; i < KVM_NR_DB_REGS; i++)
1182 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1183 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
1184 }
1185}
1186
1187void kvm_update_dr7(struct kvm_vcpu *vcpu)
1188{
1189 unsigned long dr7;
1190
1191 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1192 dr7 = vcpu->arch.guest_debug_dr7;
1193 else
1194 dr7 = vcpu->arch.dr7;
1195 static_call(kvm_x86_set_dr7)(vcpu, dr7);
1196 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1197 if (dr7 & DR7_BP_EN_MASK)
1198 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1199}
1200EXPORT_SYMBOL_GPL(kvm_update_dr7);
1201
1202static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1203{
1204 u64 fixed = DR6_FIXED_1;
1205
1206 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1207 fixed |= DR6_RTM;
1208
1209 if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
1210 fixed |= DR6_BUS_LOCK;
1211 return fixed;
1212}
1213
1214int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1215{
1216 size_t size = ARRAY_SIZE(vcpu->arch.db);
1217
1218 switch (dr) {
1219 case 0 ... 3:
1220 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1221 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1222 vcpu->arch.eff_db[dr] = val;
1223 break;
1224 case 4:
1225 case 6:
1226 if (!kvm_dr6_valid(val))
1227 return 1;
1228 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1229 break;
1230 case 5:
1231 default:
1232 if (!kvm_dr7_valid(val))
1233 return 1;
1234 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1235 kvm_update_dr7(vcpu);
1236 break;
1237 }
1238
1239 return 0;
1240}
1241EXPORT_SYMBOL_GPL(kvm_set_dr);
1242
1243void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1244{
1245 size_t size = ARRAY_SIZE(vcpu->arch.db);
1246
1247 switch (dr) {
1248 case 0 ... 3:
1249 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1250 break;
1251 case 4:
1252 case 6:
1253 *val = vcpu->arch.dr6;
1254 break;
1255 case 5:
1256 default:
1257 *val = vcpu->arch.dr7;
1258 break;
1259 }
1260}
1261EXPORT_SYMBOL_GPL(kvm_get_dr);
1262
1263int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
1264{
1265 u32 ecx = kvm_rcx_read(vcpu);
1266 u64 data;
1267
1268 if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
1269 kvm_inject_gp(vcpu, 0);
1270 return 1;
1271 }
1272
1273 kvm_rax_write(vcpu, (u32)data);
1274 kvm_rdx_write(vcpu, data >> 32);
1275 return kvm_skip_emulated_instruction(vcpu);
1276}
1277EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291static const u32 msrs_to_save_all[] = {
1292 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1293 MSR_STAR,
1294#ifdef CONFIG_X86_64
1295 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1296#endif
1297 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1298 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1299 MSR_IA32_SPEC_CTRL,
1300 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1301 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1302 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1303 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1304 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1305 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1306 MSR_IA32_UMWAIT_CONTROL,
1307
1308 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1309 MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1310 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1311 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1312 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1313 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1314 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1315 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1316 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1317 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1318 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1319 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1320 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1321 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1322 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1323 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1324 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1325 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1326 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1327 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1328 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1329 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1330};
1331
1332static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1333static unsigned num_msrs_to_save;
1334
1335static const u32 emulated_msrs_all[] = {
1336 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1337 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1338 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1339 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1340 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1341 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1342 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1343 HV_X64_MSR_RESET,
1344 HV_X64_MSR_VP_INDEX,
1345 HV_X64_MSR_VP_RUNTIME,
1346 HV_X64_MSR_SCONTROL,
1347 HV_X64_MSR_STIMER0_CONFIG,
1348 HV_X64_MSR_VP_ASSIST_PAGE,
1349 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1350 HV_X64_MSR_TSC_EMULATION_STATUS,
1351 HV_X64_MSR_SYNDBG_OPTIONS,
1352 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1353 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1354 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1355
1356 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1357 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1358
1359 MSR_IA32_TSC_ADJUST,
1360 MSR_IA32_TSC_DEADLINE,
1361 MSR_IA32_ARCH_CAPABILITIES,
1362 MSR_IA32_PERF_CAPABILITIES,
1363 MSR_IA32_MISC_ENABLE,
1364 MSR_IA32_MCG_STATUS,
1365 MSR_IA32_MCG_CTL,
1366 MSR_IA32_MCG_EXT_CTL,
1367 MSR_IA32_SMBASE,
1368 MSR_SMI_COUNT,
1369 MSR_PLATFORM_INFO,
1370 MSR_MISC_FEATURES_ENABLES,
1371 MSR_AMD64_VIRT_SPEC_CTRL,
1372 MSR_IA32_POWER_CTL,
1373 MSR_IA32_UCODE_REV,
1374
1375
1376
1377
1378
1379
1380
1381
1382 MSR_IA32_VMX_BASIC,
1383 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1384 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1385 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1386 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1387 MSR_IA32_VMX_MISC,
1388 MSR_IA32_VMX_CR0_FIXED0,
1389 MSR_IA32_VMX_CR4_FIXED0,
1390 MSR_IA32_VMX_VMCS_ENUM,
1391 MSR_IA32_VMX_PROCBASED_CTLS2,
1392 MSR_IA32_VMX_EPT_VPID_CAP,
1393 MSR_IA32_VMX_VMFUNC,
1394
1395 MSR_K7_HWCR,
1396 MSR_KVM_POLL_CONTROL,
1397};
1398
1399static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1400static unsigned num_emulated_msrs;
1401
1402
1403
1404
1405
1406static const u32 msr_based_features_all[] = {
1407 MSR_IA32_VMX_BASIC,
1408 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1409 MSR_IA32_VMX_PINBASED_CTLS,
1410 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1411 MSR_IA32_VMX_PROCBASED_CTLS,
1412 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1413 MSR_IA32_VMX_EXIT_CTLS,
1414 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1415 MSR_IA32_VMX_ENTRY_CTLS,
1416 MSR_IA32_VMX_MISC,
1417 MSR_IA32_VMX_CR0_FIXED0,
1418 MSR_IA32_VMX_CR0_FIXED1,
1419 MSR_IA32_VMX_CR4_FIXED0,
1420 MSR_IA32_VMX_CR4_FIXED1,
1421 MSR_IA32_VMX_VMCS_ENUM,
1422 MSR_IA32_VMX_PROCBASED_CTLS2,
1423 MSR_IA32_VMX_EPT_VPID_CAP,
1424 MSR_IA32_VMX_VMFUNC,
1425
1426 MSR_F10H_DECFG,
1427 MSR_IA32_UCODE_REV,
1428 MSR_IA32_ARCH_CAPABILITIES,
1429 MSR_IA32_PERF_CAPABILITIES,
1430};
1431
1432static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1433static unsigned int num_msr_based_features;
1434
1435static u64 kvm_get_arch_capabilities(void)
1436{
1437 u64 data = 0;
1438
1439 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1440 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1441
1442
1443
1444
1445
1446
1447
1448 data |= ARCH_CAP_PSCHANGE_MC_NO;
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1460 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1461
1462 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1463 data |= ARCH_CAP_RDCL_NO;
1464 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1465 data |= ARCH_CAP_SSB_NO;
1466 if (!boot_cpu_has_bug(X86_BUG_MDS))
1467 data |= ARCH_CAP_MDS_NO;
1468
1469 if (!boot_cpu_has(X86_FEATURE_RTM)) {
1470
1471
1472
1473
1474
1475
1476
1477 data &= ~ARCH_CAP_TAA_NO;
1478 } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
1479 data |= ARCH_CAP_TAA_NO;
1480 } else {
1481
1482
1483
1484
1485
1486 }
1487
1488 return data;
1489}
1490
1491static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1492{
1493 switch (msr->index) {
1494 case MSR_IA32_ARCH_CAPABILITIES:
1495 msr->data = kvm_get_arch_capabilities();
1496 break;
1497 case MSR_IA32_UCODE_REV:
1498 rdmsrl_safe(msr->index, &msr->data);
1499 break;
1500 default:
1501 return static_call(kvm_x86_get_msr_feature)(msr);
1502 }
1503 return 0;
1504}
1505
1506static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1507{
1508 struct kvm_msr_entry msr;
1509 int r;
1510
1511 msr.index = index;
1512 r = kvm_get_msr_feature(&msr);
1513
1514 if (r == KVM_MSR_RET_INVALID) {
1515
1516 *data = 0;
1517 if (kvm_msr_ignored_check(index, 0, false))
1518 r = 0;
1519 }
1520
1521 if (r)
1522 return r;
1523
1524 *data = msr.data;
1525
1526 return 0;
1527}
1528
1529static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1530{
1531 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1532 return false;
1533
1534 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1535 return false;
1536
1537 if (efer & (EFER_LME | EFER_LMA) &&
1538 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1539 return false;
1540
1541 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1542 return false;
1543
1544 return true;
1545
1546}
1547bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1548{
1549 if (efer & efer_reserved_bits)
1550 return false;
1551
1552 return __kvm_valid_efer(vcpu, efer);
1553}
1554EXPORT_SYMBOL_GPL(kvm_valid_efer);
1555
1556static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1557{
1558 u64 old_efer = vcpu->arch.efer;
1559 u64 efer = msr_info->data;
1560 int r;
1561
1562 if (efer & efer_reserved_bits)
1563 return 1;
1564
1565 if (!msr_info->host_initiated) {
1566 if (!__kvm_valid_efer(vcpu, efer))
1567 return 1;
1568
1569 if (is_paging(vcpu) &&
1570 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1571 return 1;
1572 }
1573
1574 efer &= ~EFER_LMA;
1575 efer |= vcpu->arch.efer & EFER_LMA;
1576
1577 r = static_call(kvm_x86_set_efer)(vcpu, efer);
1578 if (r) {
1579 WARN_ON(r > 0);
1580 return r;
1581 }
1582
1583
1584 if ((efer ^ old_efer) & EFER_NX)
1585 kvm_mmu_reset_context(vcpu);
1586
1587 return 0;
1588}
1589
1590void kvm_enable_efer_bits(u64 mask)
1591{
1592 efer_reserved_bits &= ~mask;
1593}
1594EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1595
1596bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
1597{
1598 struct kvm_x86_msr_filter *msr_filter;
1599 struct msr_bitmap_range *ranges;
1600 struct kvm *kvm = vcpu->kvm;
1601 bool allowed;
1602 int idx;
1603 u32 i;
1604
1605
1606 if (index >= 0x800 && index <= 0x8ff)
1607 return true;
1608
1609 idx = srcu_read_lock(&kvm->srcu);
1610
1611 msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
1612 if (!msr_filter) {
1613 allowed = true;
1614 goto out;
1615 }
1616
1617 allowed = msr_filter->default_allow;
1618 ranges = msr_filter->ranges;
1619
1620 for (i = 0; i < msr_filter->count; i++) {
1621 u32 start = ranges[i].base;
1622 u32 end = start + ranges[i].nmsrs;
1623 u32 flags = ranges[i].flags;
1624 unsigned long *bitmap = ranges[i].bitmap;
1625
1626 if ((index >= start) && (index < end) && (flags & type)) {
1627 allowed = !!test_bit(index - start, bitmap);
1628 break;
1629 }
1630 }
1631
1632out:
1633 srcu_read_unlock(&kvm->srcu, idx);
1634
1635 return allowed;
1636}
1637EXPORT_SYMBOL_GPL(kvm_msr_allowed);
1638
1639
1640
1641
1642
1643
1644
1645static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1646 bool host_initiated)
1647{
1648 struct msr_data msr;
1649
1650 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
1651 return KVM_MSR_RET_FILTERED;
1652
1653 switch (index) {
1654 case MSR_FS_BASE:
1655 case MSR_GS_BASE:
1656 case MSR_KERNEL_GS_BASE:
1657 case MSR_CSTAR:
1658 case MSR_LSTAR:
1659 if (is_noncanonical_address(data, vcpu))
1660 return 1;
1661 break;
1662 case MSR_IA32_SYSENTER_EIP:
1663 case MSR_IA32_SYSENTER_ESP:
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1677 break;
1678 case MSR_TSC_AUX:
1679 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1680 return 1;
1681
1682 if (!host_initiated &&
1683 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1684 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1685 return 1;
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696 if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
1697 return 1;
1698
1699 data = (u32)data;
1700 break;
1701 }
1702
1703 msr.data = data;
1704 msr.index = index;
1705 msr.host_initiated = host_initiated;
1706
1707 return static_call(kvm_x86_set_msr)(vcpu, &msr);
1708}
1709
1710static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1711 u32 index, u64 data, bool host_initiated)
1712{
1713 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1714
1715 if (ret == KVM_MSR_RET_INVALID)
1716 if (kvm_msr_ignored_check(index, data, true))
1717 ret = 0;
1718
1719 return ret;
1720}
1721
1722
1723
1724
1725
1726
1727
1728int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1729 bool host_initiated)
1730{
1731 struct msr_data msr;
1732 int ret;
1733
1734 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
1735 return KVM_MSR_RET_FILTERED;
1736
1737 switch (index) {
1738 case MSR_TSC_AUX:
1739 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1740 return 1;
1741
1742 if (!host_initiated &&
1743 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1744 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1745 return 1;
1746 break;
1747 }
1748
1749 msr.index = index;
1750 msr.host_initiated = host_initiated;
1751
1752 ret = static_call(kvm_x86_get_msr)(vcpu, &msr);
1753 if (!ret)
1754 *data = msr.data;
1755 return ret;
1756}
1757
1758static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1759 u32 index, u64 *data, bool host_initiated)
1760{
1761 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1762
1763 if (ret == KVM_MSR_RET_INVALID) {
1764
1765 *data = 0;
1766 if (kvm_msr_ignored_check(index, 0, false))
1767 ret = 0;
1768 }
1769
1770 return ret;
1771}
1772
1773int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1774{
1775 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1776}
1777EXPORT_SYMBOL_GPL(kvm_get_msr);
1778
1779int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1780{
1781 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1782}
1783EXPORT_SYMBOL_GPL(kvm_set_msr);
1784
1785static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
1786{
1787 int err = vcpu->run->msr.error;
1788 if (!err) {
1789 kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
1790 kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
1791 }
1792
1793 return static_call(kvm_x86_complete_emulated_msr)(vcpu, err);
1794}
1795
1796static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
1797{
1798 return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
1799}
1800
1801static u64 kvm_msr_reason(int r)
1802{
1803 switch (r) {
1804 case KVM_MSR_RET_INVALID:
1805 return KVM_MSR_EXIT_REASON_UNKNOWN;
1806 case KVM_MSR_RET_FILTERED:
1807 return KVM_MSR_EXIT_REASON_FILTER;
1808 default:
1809 return KVM_MSR_EXIT_REASON_INVAL;
1810 }
1811}
1812
1813static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
1814 u32 exit_reason, u64 data,
1815 int (*completion)(struct kvm_vcpu *vcpu),
1816 int r)
1817{
1818 u64 msr_reason = kvm_msr_reason(r);
1819
1820
1821 if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
1822 return 0;
1823
1824 vcpu->run->exit_reason = exit_reason;
1825 vcpu->run->msr.error = 0;
1826 memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
1827 vcpu->run->msr.reason = msr_reason;
1828 vcpu->run->msr.index = index;
1829 vcpu->run->msr.data = data;
1830 vcpu->arch.complete_userspace_io = completion;
1831
1832 return 1;
1833}
1834
1835static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
1836{
1837 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
1838 complete_emulated_rdmsr, r);
1839}
1840
1841static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
1842{
1843 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
1844 complete_emulated_wrmsr, r);
1845}
1846
1847int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1848{
1849 u32 ecx = kvm_rcx_read(vcpu);
1850 u64 data;
1851 int r;
1852
1853 r = kvm_get_msr(vcpu, ecx, &data);
1854
1855
1856 if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
1857
1858 return 0;
1859 }
1860
1861 if (!r) {
1862 trace_kvm_msr_read(ecx, data);
1863
1864 kvm_rax_write(vcpu, data & -1u);
1865 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1866 } else {
1867 trace_kvm_msr_read_ex(ecx);
1868 }
1869
1870 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1871}
1872EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1873
1874int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1875{
1876 u32 ecx = kvm_rcx_read(vcpu);
1877 u64 data = kvm_read_edx_eax(vcpu);
1878 int r;
1879
1880 r = kvm_set_msr(vcpu, ecx, data);
1881
1882
1883 if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
1884
1885 return 0;
1886
1887
1888 if (r < 0)
1889 return r;
1890
1891 if (!r)
1892 trace_kvm_msr_write(ecx, data);
1893 else
1894 trace_kvm_msr_write_ex(ecx, data);
1895
1896 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1897}
1898EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1899
1900int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
1901{
1902 return kvm_skip_emulated_instruction(vcpu);
1903}
1904EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
1905
1906int kvm_emulate_invd(struct kvm_vcpu *vcpu)
1907{
1908
1909 return kvm_emulate_as_nop(vcpu);
1910}
1911EXPORT_SYMBOL_GPL(kvm_emulate_invd);
1912
1913int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
1914{
1915 pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
1916 return kvm_emulate_as_nop(vcpu);
1917}
1918EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
1919
1920int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
1921{
1922 kvm_queue_exception(vcpu, UD_VECTOR);
1923 return 1;
1924}
1925EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
1926
1927int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
1928{
1929 pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
1930 return kvm_emulate_as_nop(vcpu);
1931}
1932EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
1933
1934static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
1935{
1936 xfer_to_guest_mode_prepare();
1937 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
1938 xfer_to_guest_mode_work_pending();
1939}
1940
1941
1942
1943
1944
1945
1946
1947
1948static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
1949{
1950 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
1951 return 1;
1952
1953 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
1954 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
1955 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
1956 ((u32)(data >> 32) != X2APIC_BROADCAST)) {
1957
1958 data &= ~(1 << 12);
1959 kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
1960 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
1961 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
1962 trace_kvm_apic_write(APIC_ICR, (u32)data);
1963 return 0;
1964 }
1965
1966 return 1;
1967}
1968
1969static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
1970{
1971 if (!kvm_can_use_hv_timer(vcpu))
1972 return 1;
1973
1974 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1975 return 0;
1976}
1977
1978fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
1979{
1980 u32 msr = kvm_rcx_read(vcpu);
1981 u64 data;
1982 fastpath_t ret = EXIT_FASTPATH_NONE;
1983
1984 switch (msr) {
1985 case APIC_BASE_MSR + (APIC_ICR >> 4):
1986 data = kvm_read_edx_eax(vcpu);
1987 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
1988 kvm_skip_emulated_instruction(vcpu);
1989 ret = EXIT_FASTPATH_EXIT_HANDLED;
1990 }
1991 break;
1992 case MSR_IA32_TSC_DEADLINE:
1993 data = kvm_read_edx_eax(vcpu);
1994 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
1995 kvm_skip_emulated_instruction(vcpu);
1996 ret = EXIT_FASTPATH_REENTER_GUEST;
1997 }
1998 break;
1999 default:
2000 break;
2001 }
2002
2003 if (ret != EXIT_FASTPATH_NONE)
2004 trace_kvm_msr_write(msr, data);
2005
2006 return ret;
2007}
2008EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
2009
2010
2011
2012
2013static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2014{
2015 return kvm_get_msr_ignored_check(vcpu, index, data, true);
2016}
2017
2018static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2019{
2020 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
2021}
2022
2023#ifdef CONFIG_X86_64
2024struct pvclock_clock {
2025 int vclock_mode;
2026 u64 cycle_last;
2027 u64 mask;
2028 u32 mult;
2029 u32 shift;
2030 u64 base_cycles;
2031 u64 offset;
2032};
2033
2034struct pvclock_gtod_data {
2035 seqcount_t seq;
2036
2037 struct pvclock_clock clock;
2038 struct pvclock_clock raw_clock;
2039
2040 ktime_t offs_boot;
2041 u64 wall_time_sec;
2042};
2043
2044static struct pvclock_gtod_data pvclock_gtod_data;
2045
2046static void update_pvclock_gtod(struct timekeeper *tk)
2047{
2048 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
2049
2050 write_seqcount_begin(&vdata->seq);
2051
2052
2053 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
2054 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
2055 vdata->clock.mask = tk->tkr_mono.mask;
2056 vdata->clock.mult = tk->tkr_mono.mult;
2057 vdata->clock.shift = tk->tkr_mono.shift;
2058 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
2059 vdata->clock.offset = tk->tkr_mono.base;
2060
2061 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
2062 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
2063 vdata->raw_clock.mask = tk->tkr_raw.mask;
2064 vdata->raw_clock.mult = tk->tkr_raw.mult;
2065 vdata->raw_clock.shift = tk->tkr_raw.shift;
2066 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
2067 vdata->raw_clock.offset = tk->tkr_raw.base;
2068
2069 vdata->wall_time_sec = tk->xtime_sec;
2070
2071 vdata->offs_boot = tk->offs_boot;
2072
2073 write_seqcount_end(&vdata->seq);
2074}
2075
2076static s64 get_kvmclock_base_ns(void)
2077{
2078
2079 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
2080}
2081#else
2082static s64 get_kvmclock_base_ns(void)
2083{
2084
2085 return ktime_get_boottime_ns();
2086}
2087#endif
2088
2089void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
2090{
2091 int version;
2092 int r;
2093 struct pvclock_wall_clock wc;
2094 u32 wc_sec_hi;
2095 u64 wall_nsec;
2096
2097 if (!wall_clock)
2098 return;
2099
2100 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
2101 if (r)
2102 return;
2103
2104 if (version & 1)
2105 ++version;
2106
2107 ++version;
2108
2109 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
2110 return;
2111
2112
2113
2114
2115
2116
2117 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
2118
2119 wc.nsec = do_div(wall_nsec, 1000000000);
2120 wc.sec = (u32)wall_nsec;
2121 wc.version = version;
2122
2123 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
2124
2125 if (sec_hi_ofs) {
2126 wc_sec_hi = wall_nsec >> 32;
2127 kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
2128 &wc_sec_hi, sizeof(wc_sec_hi));
2129 }
2130
2131 version++;
2132 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
2133}
2134
2135static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
2136 bool old_msr, bool host_initiated)
2137{
2138 struct kvm_arch *ka = &vcpu->kvm->arch;
2139
2140 if (vcpu->vcpu_id == 0 && !host_initiated) {
2141 if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
2142 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2143
2144 ka->boot_vcpu_runs_old_kvmclock = old_msr;
2145 }
2146
2147 vcpu->arch.time = system_time;
2148 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2149
2150
2151 vcpu->arch.pv_time_enabled = false;
2152 if (!(system_time & 1))
2153 return;
2154
2155 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2156 &vcpu->arch.pv_time, system_time & ~1ULL,
2157 sizeof(struct pvclock_vcpu_time_info)))
2158 vcpu->arch.pv_time_enabled = true;
2159
2160 return;
2161}
2162
2163static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
2164{
2165 do_shl32_div32(dividend, divisor);
2166 return dividend;
2167}
2168
2169static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
2170 s8 *pshift, u32 *pmultiplier)
2171{
2172 uint64_t scaled64;
2173 int32_t shift = 0;
2174 uint64_t tps64;
2175 uint32_t tps32;
2176
2177 tps64 = base_hz;
2178 scaled64 = scaled_hz;
2179 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
2180 tps64 >>= 1;
2181 shift--;
2182 }
2183
2184 tps32 = (uint32_t)tps64;
2185 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
2186 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
2187 scaled64 >>= 1;
2188 else
2189 tps32 <<= 1;
2190 shift++;
2191 }
2192
2193 *pshift = shift;
2194 *pmultiplier = div_frac(scaled64, tps32);
2195}
2196
2197#ifdef CONFIG_X86_64
2198static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
2199#endif
2200
2201static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
2202static unsigned long max_tsc_khz;
2203
2204static u32 adjust_tsc_khz(u32 khz, s32 ppm)
2205{
2206 u64 v = (u64)khz * (1000000 + ppm);
2207 do_div(v, 1000000);
2208 return v;
2209}
2210
2211static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier);
2212
2213static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
2214{
2215 u64 ratio;
2216
2217
2218 if (!scale) {
2219 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2220 return 0;
2221 }
2222
2223
2224 if (!kvm_has_tsc_control) {
2225 if (user_tsc_khz > tsc_khz) {
2226 vcpu->arch.tsc_catchup = 1;
2227 vcpu->arch.tsc_always_catchup = 1;
2228 return 0;
2229 } else {
2230 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
2231 return -1;
2232 }
2233 }
2234
2235
2236 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
2237 user_tsc_khz, tsc_khz);
2238
2239 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
2240 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
2241 user_tsc_khz);
2242 return -1;
2243 }
2244
2245 kvm_vcpu_write_tsc_multiplier(vcpu, ratio);
2246 return 0;
2247}
2248
2249static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
2250{
2251 u32 thresh_lo, thresh_hi;
2252 int use_scaling = 0;
2253
2254
2255 if (user_tsc_khz == 0) {
2256
2257 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2258 return -1;
2259 }
2260
2261
2262 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
2263 &vcpu->arch.virtual_tsc_shift,
2264 &vcpu->arch.virtual_tsc_mult);
2265 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
2266
2267
2268
2269
2270
2271
2272
2273 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
2274 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
2275 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
2276 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
2277 use_scaling = 1;
2278 }
2279 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
2280}
2281
2282static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
2283{
2284 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
2285 vcpu->arch.virtual_tsc_mult,
2286 vcpu->arch.virtual_tsc_shift);
2287 tsc += vcpu->arch.this_tsc_write;
2288 return tsc;
2289}
2290
2291static inline int gtod_is_based_on_tsc(int mode)
2292{
2293 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
2294}
2295
2296static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
2297{
2298#ifdef CONFIG_X86_64
2299 bool vcpus_matched;
2300 struct kvm_arch *ka = &vcpu->kvm->arch;
2301 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2302
2303 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2304 atomic_read(&vcpu->kvm->online_vcpus));
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314 if (ka->use_master_clock ||
2315 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
2316 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2317
2318 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
2319 atomic_read(&vcpu->kvm->online_vcpus),
2320 ka->use_master_clock, gtod->clock.vclock_mode);
2321#endif
2322}
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334static inline u64 __scale_tsc(u64 ratio, u64 tsc)
2335{
2336 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
2337}
2338
2339u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio)
2340{
2341 u64 _tsc = tsc;
2342
2343 if (ratio != kvm_default_tsc_scaling_ratio)
2344 _tsc = __scale_tsc(ratio, tsc);
2345
2346 return _tsc;
2347}
2348EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2349
2350static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2351{
2352 u64 tsc;
2353
2354 tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio);
2355
2356 return target_tsc - tsc;
2357}
2358
2359u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2360{
2361 return vcpu->arch.l1_tsc_offset +
2362 kvm_scale_tsc(vcpu, host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
2363}
2364EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2365
2366u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
2367{
2368 u64 nested_offset;
2369
2370 if (l2_multiplier == kvm_default_tsc_scaling_ratio)
2371 nested_offset = l1_offset;
2372 else
2373 nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
2374 kvm_tsc_scaling_ratio_frac_bits);
2375
2376 nested_offset += l2_offset;
2377 return nested_offset;
2378}
2379EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
2380
2381u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
2382{
2383 if (l2_multiplier != kvm_default_tsc_scaling_ratio)
2384 return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
2385 kvm_tsc_scaling_ratio_frac_bits);
2386
2387 return l1_multiplier;
2388}
2389EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
2390
2391static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
2392{
2393 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2394 vcpu->arch.l1_tsc_offset,
2395 l1_offset);
2396
2397 vcpu->arch.l1_tsc_offset = l1_offset;
2398
2399
2400
2401
2402
2403
2404 if (is_guest_mode(vcpu))
2405 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2406 l1_offset,
2407 static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
2408 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2409 else
2410 vcpu->arch.tsc_offset = l1_offset;
2411
2412 static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
2413}
2414
2415static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
2416{
2417 vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
2418
2419
2420 if (is_guest_mode(vcpu))
2421 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2422 l1_multiplier,
2423 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2424 else
2425 vcpu->arch.tsc_scaling_ratio = l1_multiplier;
2426
2427 if (kvm_has_tsc_control)
2428 static_call(kvm_x86_write_tsc_multiplier)(
2429 vcpu, vcpu->arch.tsc_scaling_ratio);
2430}
2431
2432static inline bool kvm_check_tsc_unstable(void)
2433{
2434#ifdef CONFIG_X86_64
2435
2436
2437
2438
2439 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2440 return false;
2441#endif
2442 return check_tsc_unstable();
2443}
2444
2445static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
2446{
2447 struct kvm *kvm = vcpu->kvm;
2448 u64 offset, ns, elapsed;
2449 unsigned long flags;
2450 bool matched;
2451 bool already_matched;
2452 bool synchronizing = false;
2453
2454 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2455 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2456 ns = get_kvmclock_base_ns();
2457 elapsed = ns - kvm->arch.last_tsc_nsec;
2458
2459 if (vcpu->arch.virtual_tsc_khz) {
2460 if (data == 0) {
2461
2462
2463
2464
2465
2466 synchronizing = true;
2467 } else {
2468 u64 tsc_exp = kvm->arch.last_tsc_write +
2469 nsec_to_cycles(vcpu, elapsed);
2470 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2471
2472
2473
2474
2475
2476 synchronizing = data < tsc_exp + tsc_hz &&
2477 data + tsc_hz > tsc_exp;
2478 }
2479 }
2480
2481
2482
2483
2484
2485
2486
2487 if (synchronizing &&
2488 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2489 if (!kvm_check_tsc_unstable()) {
2490 offset = kvm->arch.cur_tsc_offset;
2491 } else {
2492 u64 delta = nsec_to_cycles(vcpu, elapsed);
2493 data += delta;
2494 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2495 }
2496 matched = true;
2497 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
2498 } else {
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508 kvm->arch.cur_tsc_generation++;
2509 kvm->arch.cur_tsc_nsec = ns;
2510 kvm->arch.cur_tsc_write = data;
2511 kvm->arch.cur_tsc_offset = offset;
2512 matched = false;
2513 }
2514
2515
2516
2517
2518
2519 kvm->arch.last_tsc_nsec = ns;
2520 kvm->arch.last_tsc_write = data;
2521 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2522
2523 vcpu->arch.last_guest_tsc = data;
2524
2525
2526 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2527 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2528 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2529
2530 kvm_vcpu_write_tsc_offset(vcpu, offset);
2531 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2532
2533 spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
2534 if (!matched) {
2535 kvm->arch.nr_vcpus_matched_tsc = 0;
2536 } else if (!already_matched) {
2537 kvm->arch.nr_vcpus_matched_tsc++;
2538 }
2539
2540 kvm_track_tsc_matching(vcpu);
2541 spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
2542}
2543
2544static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2545 s64 adjustment)
2546{
2547 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2548 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2549}
2550
2551static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2552{
2553 if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2554 WARN_ON(adjustment < 0);
2555 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment,
2556 vcpu->arch.l1_tsc_scaling_ratio);
2557 adjust_tsc_offset_guest(vcpu, adjustment);
2558}
2559
2560#ifdef CONFIG_X86_64
2561
2562static u64 read_tsc(void)
2563{
2564 u64 ret = (u64)rdtsc_ordered();
2565 u64 last = pvclock_gtod_data.clock.cycle_last;
2566
2567 if (likely(ret >= last))
2568 return ret;
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578 asm volatile ("");
2579 return last;
2580}
2581
2582static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2583 int *mode)
2584{
2585 long v;
2586 u64 tsc_pg_val;
2587
2588 switch (clock->vclock_mode) {
2589 case VDSO_CLOCKMODE_HVCLOCK:
2590 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2591 tsc_timestamp);
2592 if (tsc_pg_val != U64_MAX) {
2593
2594 *mode = VDSO_CLOCKMODE_HVCLOCK;
2595 v = (tsc_pg_val - clock->cycle_last) &
2596 clock->mask;
2597 } else {
2598
2599 *mode = VDSO_CLOCKMODE_NONE;
2600 }
2601 break;
2602 case VDSO_CLOCKMODE_TSC:
2603 *mode = VDSO_CLOCKMODE_TSC;
2604 *tsc_timestamp = read_tsc();
2605 v = (*tsc_timestamp - clock->cycle_last) &
2606 clock->mask;
2607 break;
2608 default:
2609 *mode = VDSO_CLOCKMODE_NONE;
2610 }
2611
2612 if (*mode == VDSO_CLOCKMODE_NONE)
2613 *tsc_timestamp = v = 0;
2614
2615 return v * clock->mult;
2616}
2617
2618static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2619{
2620 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2621 unsigned long seq;
2622 int mode;
2623 u64 ns;
2624
2625 do {
2626 seq = read_seqcount_begin(>od->seq);
2627 ns = gtod->raw_clock.base_cycles;
2628 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2629 ns >>= gtod->raw_clock.shift;
2630 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2631 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2632 *t = ns;
2633
2634 return mode;
2635}
2636
2637static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2638{
2639 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2640 unsigned long seq;
2641 int mode;
2642 u64 ns;
2643
2644 do {
2645 seq = read_seqcount_begin(>od->seq);
2646 ts->tv_sec = gtod->wall_time_sec;
2647 ns = gtod->clock.base_cycles;
2648 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2649 ns >>= gtod->clock.shift;
2650 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2651
2652 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2653 ts->tv_nsec = ns;
2654
2655 return mode;
2656}
2657
2658
2659static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2660{
2661
2662 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2663 return false;
2664
2665 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2666 tsc_timestamp));
2667}
2668
2669
2670static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2671 u64 *tsc_timestamp)
2672{
2673
2674 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2675 return false;
2676
2677 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2678}
2679#endif
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2723{
2724#ifdef CONFIG_X86_64
2725 struct kvm_arch *ka = &kvm->arch;
2726 int vclock_mode;
2727 bool host_tsc_clocksource, vcpus_matched;
2728
2729 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2730 atomic_read(&kvm->online_vcpus));
2731
2732
2733
2734
2735
2736 host_tsc_clocksource = kvm_get_time_and_clockread(
2737 &ka->master_kernel_ns,
2738 &ka->master_cycle_now);
2739
2740 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2741 && !ka->backwards_tsc_observed
2742 && !ka->boot_vcpu_runs_old_kvmclock;
2743
2744 if (ka->use_master_clock)
2745 atomic_set(&kvm_guest_has_master_clock, 1);
2746
2747 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2748 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2749 vcpus_matched);
2750#endif
2751}
2752
2753void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2754{
2755 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2756}
2757
2758static void kvm_gen_update_masterclock(struct kvm *kvm)
2759{
2760#ifdef CONFIG_X86_64
2761 int i;
2762 struct kvm_vcpu *vcpu;
2763 struct kvm_arch *ka = &kvm->arch;
2764 unsigned long flags;
2765
2766 kvm_hv_invalidate_tsc_page(kvm);
2767
2768 kvm_make_mclock_inprogress_request(kvm);
2769
2770
2771 spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2772 pvclock_update_vm_gtod_copy(kvm);
2773 spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2774
2775 kvm_for_each_vcpu(i, vcpu, kvm)
2776 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2777
2778
2779 kvm_for_each_vcpu(i, vcpu, kvm)
2780 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2781#endif
2782}
2783
2784u64 get_kvmclock_ns(struct kvm *kvm)
2785{
2786 struct kvm_arch *ka = &kvm->arch;
2787 struct pvclock_vcpu_time_info hv_clock;
2788 unsigned long flags;
2789 u64 ret;
2790
2791 spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2792 if (!ka->use_master_clock) {
2793 spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2794 return get_kvmclock_base_ns() + ka->kvmclock_offset;
2795 }
2796
2797 hv_clock.tsc_timestamp = ka->master_cycle_now;
2798 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2799 spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2800
2801
2802 get_cpu();
2803
2804 if (__this_cpu_read(cpu_tsc_khz)) {
2805 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2806 &hv_clock.tsc_shift,
2807 &hv_clock.tsc_to_system_mul);
2808 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2809 } else
2810 ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
2811
2812 put_cpu();
2813
2814 return ret;
2815}
2816
2817static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
2818 struct gfn_to_hva_cache *cache,
2819 unsigned int offset)
2820{
2821 struct kvm_vcpu_arch *vcpu = &v->arch;
2822 struct pvclock_vcpu_time_info guest_hv_clock;
2823
2824 if (unlikely(kvm_read_guest_offset_cached(v->kvm, cache,
2825 &guest_hv_clock, offset, sizeof(guest_hv_clock))))
2826 return;
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2843
2844 if (guest_hv_clock.version & 1)
2845 ++guest_hv_clock.version;
2846
2847 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2848 kvm_write_guest_offset_cached(v->kvm, cache,
2849 &vcpu->hv_clock, offset,
2850 sizeof(vcpu->hv_clock.version));
2851
2852 smp_wmb();
2853
2854
2855 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2856
2857 if (vcpu->pvclock_set_guest_stopped_request) {
2858 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2859 vcpu->pvclock_set_guest_stopped_request = false;
2860 }
2861
2862 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2863
2864 kvm_write_guest_offset_cached(v->kvm, cache,
2865 &vcpu->hv_clock, offset,
2866 sizeof(vcpu->hv_clock));
2867
2868 smp_wmb();
2869
2870 vcpu->hv_clock.version++;
2871 kvm_write_guest_offset_cached(v->kvm, cache,
2872 &vcpu->hv_clock, offset,
2873 sizeof(vcpu->hv_clock.version));
2874}
2875
2876static int kvm_guest_time_update(struct kvm_vcpu *v)
2877{
2878 unsigned long flags, tgt_tsc_khz;
2879 struct kvm_vcpu_arch *vcpu = &v->arch;
2880 struct kvm_arch *ka = &v->kvm->arch;
2881 s64 kernel_ns;
2882 u64 tsc_timestamp, host_tsc;
2883 u8 pvclock_flags;
2884 bool use_master_clock;
2885
2886 kernel_ns = 0;
2887 host_tsc = 0;
2888
2889
2890
2891
2892
2893 spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2894 use_master_clock = ka->use_master_clock;
2895 if (use_master_clock) {
2896 host_tsc = ka->master_cycle_now;
2897 kernel_ns = ka->master_kernel_ns;
2898 }
2899 spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2900
2901
2902 local_irq_save(flags);
2903 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2904 if (unlikely(tgt_tsc_khz == 0)) {
2905 local_irq_restore(flags);
2906 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2907 return 1;
2908 }
2909 if (!use_master_clock) {
2910 host_tsc = rdtsc();
2911 kernel_ns = get_kvmclock_base_ns();
2912 }
2913
2914 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926 if (vcpu->tsc_catchup) {
2927 u64 tsc = compute_guest_tsc(v, kernel_ns);
2928 if (tsc > tsc_timestamp) {
2929 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2930 tsc_timestamp = tsc;
2931 }
2932 }
2933
2934 local_irq_restore(flags);
2935
2936
2937
2938 if (kvm_has_tsc_control)
2939 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz,
2940 v->arch.l1_tsc_scaling_ratio);
2941
2942 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2943 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2944 &vcpu->hv_clock.tsc_shift,
2945 &vcpu->hv_clock.tsc_to_system_mul);
2946 vcpu->hw_tsc_khz = tgt_tsc_khz;
2947 }
2948
2949 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2950 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2951 vcpu->last_guest_tsc = tsc_timestamp;
2952
2953
2954 pvclock_flags = 0;
2955 if (use_master_clock)
2956 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2957
2958 vcpu->hv_clock.flags = pvclock_flags;
2959
2960 if (vcpu->pv_time_enabled)
2961 kvm_setup_pvclock_page(v, &vcpu->pv_time, 0);
2962 if (vcpu->xen.vcpu_info_set)
2963 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
2964 offsetof(struct compat_vcpu_info, time));
2965 if (vcpu->xen.vcpu_time_info_set)
2966 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
2967 if (v == kvm_get_vcpu(v->kvm, 0))
2968 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2969 return 0;
2970}
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2987
2988static void kvmclock_update_fn(struct work_struct *work)
2989{
2990 int i;
2991 struct delayed_work *dwork = to_delayed_work(work);
2992 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2993 kvmclock_update_work);
2994 struct kvm *kvm = container_of(ka, struct kvm, arch);
2995 struct kvm_vcpu *vcpu;
2996
2997 kvm_for_each_vcpu(i, vcpu, kvm) {
2998 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2999 kvm_vcpu_kick(vcpu);
3000 }
3001}
3002
3003static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
3004{
3005 struct kvm *kvm = v->kvm;
3006
3007 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3008 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
3009 KVMCLOCK_UPDATE_DELAY);
3010}
3011
3012#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
3013
3014static void kvmclock_sync_fn(struct work_struct *work)
3015{
3016 struct delayed_work *dwork = to_delayed_work(work);
3017 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3018 kvmclock_sync_work);
3019 struct kvm *kvm = container_of(ka, struct kvm, arch);
3020
3021 if (!kvmclock_periodic_sync)
3022 return;
3023
3024 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
3025 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
3026 KVMCLOCK_SYNC_PERIOD);
3027}
3028
3029
3030
3031
3032static bool can_set_mci_status(struct kvm_vcpu *vcpu)
3033{
3034
3035 if (guest_cpuid_is_amd_or_hygon(vcpu))
3036 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
3037
3038 return false;
3039}
3040
3041static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3042{
3043 u64 mcg_cap = vcpu->arch.mcg_cap;
3044 unsigned bank_num = mcg_cap & 0xff;
3045 u32 msr = msr_info->index;
3046 u64 data = msr_info->data;
3047
3048 switch (msr) {
3049 case MSR_IA32_MCG_STATUS:
3050 vcpu->arch.mcg_status = data;
3051 break;
3052 case MSR_IA32_MCG_CTL:
3053 if (!(mcg_cap & MCG_CTL_P) &&
3054 (data || !msr_info->host_initiated))
3055 return 1;
3056 if (data != 0 && data != ~(u64)0)
3057 return 1;
3058 vcpu->arch.mcg_ctl = data;
3059 break;
3060 default:
3061 if (msr >= MSR_IA32_MC0_CTL &&
3062 msr < MSR_IA32_MCx_CTL(bank_num)) {
3063 u32 offset = array_index_nospec(
3064 msr - MSR_IA32_MC0_CTL,
3065 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3066
3067
3068
3069
3070
3071
3072 if ((offset & 0x3) == 0 &&
3073 data != 0 && (data | (1 << 10)) != ~(u64)0)
3074 return -1;
3075
3076
3077 if (!msr_info->host_initiated &&
3078 (offset & 0x3) == 1 && data != 0) {
3079 if (!can_set_mci_status(vcpu))
3080 return -1;
3081 }
3082
3083 vcpu->arch.mce_banks[offset] = data;
3084 break;
3085 }
3086 return 1;
3087 }
3088 return 0;
3089}
3090
3091static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
3092{
3093 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
3094
3095 return (vcpu->arch.apf.msr_en_val & mask) == mask;
3096}
3097
3098static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
3099{
3100 gpa_t gpa = data & ~0x3f;
3101
3102
3103 if (data & 0x30)
3104 return 1;
3105
3106 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
3107 (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
3108 return 1;
3109
3110 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
3111 (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
3112 return 1;
3113
3114 if (!lapic_in_kernel(vcpu))
3115 return data ? 1 : 0;
3116
3117 vcpu->arch.apf.msr_en_val = data;
3118
3119 if (!kvm_pv_async_pf_enabled(vcpu)) {
3120 kvm_clear_async_pf_completion_queue(vcpu);
3121 kvm_async_pf_hash_reset(vcpu);
3122 return 0;
3123 }
3124
3125 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
3126 sizeof(u64)))
3127 return 1;
3128
3129 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
3130 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
3131
3132 kvm_async_pf_wakeup_all(vcpu);
3133
3134 return 0;
3135}
3136
3137static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
3138{
3139
3140 if (data >> 8)
3141 return 1;
3142
3143 if (!lapic_in_kernel(vcpu))
3144 return 1;
3145
3146 vcpu->arch.apf.msr_int_val = data;
3147
3148 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
3149
3150 return 0;
3151}
3152
3153static void kvmclock_reset(struct kvm_vcpu *vcpu)
3154{
3155 vcpu->arch.pv_time_enabled = false;
3156 vcpu->arch.time = 0;
3157}
3158
3159static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
3160{
3161 ++vcpu->stat.tlb_flush;
3162 static_call(kvm_x86_tlb_flush_all)(vcpu);
3163}
3164
3165static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
3166{
3167 ++vcpu->stat.tlb_flush;
3168
3169 if (!tdp_enabled) {
3170
3171
3172
3173
3174
3175
3176
3177 kvm_mmu_unload(vcpu);
3178 return;
3179 }
3180
3181 static_call(kvm_x86_tlb_flush_guest)(vcpu);
3182}
3183
3184static void record_steal_time(struct kvm_vcpu *vcpu)
3185{
3186 struct kvm_host_map map;
3187 struct kvm_steal_time *st;
3188
3189 if (kvm_xen_msr_enabled(vcpu->kvm)) {
3190 kvm_xen_runstate_set_running(vcpu);
3191 return;
3192 }
3193
3194 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3195 return;
3196
3197
3198 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
3199 &map, &vcpu->arch.st.cache, false))
3200 return;
3201
3202 st = map.hva +
3203 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
3204
3205
3206
3207
3208
3209 if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
3210 u8 st_preempted = xchg(&st->preempted, 0);
3211
3212 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3213 st_preempted & KVM_VCPU_FLUSH_TLB);
3214 if (st_preempted & KVM_VCPU_FLUSH_TLB)
3215 kvm_vcpu_flush_tlb_guest(vcpu);
3216 } else {
3217 st->preempted = 0;
3218 }
3219
3220 vcpu->arch.st.preempted = 0;
3221
3222 if (st->version & 1)
3223 st->version += 1;
3224
3225 st->version += 1;
3226
3227 smp_wmb();
3228
3229 st->steal += current->sched_info.run_delay -
3230 vcpu->arch.st.last_steal;
3231 vcpu->arch.st.last_steal = current->sched_info.run_delay;
3232
3233 smp_wmb();
3234
3235 st->version += 1;
3236
3237 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
3238}
3239
3240int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3241{
3242 bool pr = false;
3243 u32 msr = msr_info->index;
3244 u64 data = msr_info->data;
3245
3246 if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
3247 return kvm_xen_write_hypercall_page(vcpu, data);
3248
3249 switch (msr) {
3250 case MSR_AMD64_NB_CFG:
3251 case MSR_IA32_UCODE_WRITE:
3252 case MSR_VM_HSAVE_PA:
3253 case MSR_AMD64_PATCH_LOADER:
3254 case MSR_AMD64_BU_CFG2:
3255 case MSR_AMD64_DC_CFG:
3256 case MSR_F15H_EX_CFG:
3257 break;
3258
3259 case MSR_IA32_UCODE_REV:
3260 if (msr_info->host_initiated)
3261 vcpu->arch.microcode_version = data;
3262 break;
3263 case MSR_IA32_ARCH_CAPABILITIES:
3264 if (!msr_info->host_initiated)
3265 return 1;
3266 vcpu->arch.arch_capabilities = data;
3267 break;
3268 case MSR_IA32_PERF_CAPABILITIES: {
3269 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
3270
3271 if (!msr_info->host_initiated)
3272 return 1;
3273 if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
3274 return 1;
3275 if (data & ~msr_ent.data)
3276 return 1;
3277
3278 vcpu->arch.perf_capabilities = data;
3279
3280 return 0;
3281 }
3282 case MSR_EFER:
3283 return set_efer(vcpu, msr_info);
3284 case MSR_K7_HWCR:
3285 data &= ~(u64)0x40;
3286 data &= ~(u64)0x100;
3287 data &= ~(u64)0x8;
3288
3289
3290 if (data == BIT_ULL(18)) {
3291 vcpu->arch.msr_hwcr = data;
3292 } else if (data != 0) {
3293 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
3294 data);
3295 return 1;
3296 }
3297 break;
3298 case MSR_FAM10H_MMIO_CONF_BASE:
3299 if (data != 0) {
3300 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
3301 "0x%llx\n", data);
3302 return 1;
3303 }
3304 break;
3305 case 0x200 ... 0x2ff:
3306 return kvm_mtrr_set_msr(vcpu, msr, data);
3307 case MSR_IA32_APICBASE:
3308 return kvm_set_apic_base(vcpu, msr_info);
3309 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3310 return kvm_x2apic_msr_write(vcpu, msr, data);
3311 case MSR_IA32_TSC_DEADLINE:
3312 kvm_set_lapic_tscdeadline_msr(vcpu, data);
3313 break;
3314 case MSR_IA32_TSC_ADJUST:
3315 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
3316 if (!msr_info->host_initiated) {
3317 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
3318 adjust_tsc_offset_guest(vcpu, adj);
3319 }
3320 vcpu->arch.ia32_tsc_adjust_msr = data;
3321 }
3322 break;
3323 case MSR_IA32_MISC_ENABLE:
3324 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
3325 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
3326 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
3327 return 1;
3328 vcpu->arch.ia32_misc_enable_msr = data;
3329 kvm_update_cpuid_runtime(vcpu);
3330 } else {
3331 vcpu->arch.ia32_misc_enable_msr = data;
3332 }
3333 break;
3334 case MSR_IA32_SMBASE:
3335 if (!msr_info->host_initiated)
3336 return 1;
3337 vcpu->arch.smbase = data;
3338 break;
3339 case MSR_IA32_POWER_CTL:
3340 vcpu->arch.msr_ia32_power_ctl = data;
3341 break;
3342 case MSR_IA32_TSC:
3343 if (msr_info->host_initiated) {
3344 kvm_synchronize_tsc(vcpu, data);
3345 } else {
3346 u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
3347 adjust_tsc_offset_guest(vcpu, adj);
3348 vcpu->arch.ia32_tsc_adjust_msr += adj;
3349 }
3350 break;
3351 case MSR_IA32_XSS:
3352 if (!msr_info->host_initiated &&
3353 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3354 return 1;
3355
3356
3357
3358
3359
3360 if (data & ~supported_xss)
3361 return 1;
3362 vcpu->arch.ia32_xss = data;
3363 break;
3364 case MSR_SMI_COUNT:
3365 if (!msr_info->host_initiated)
3366 return 1;
3367 vcpu->arch.smi_count = data;
3368 break;
3369 case MSR_KVM_WALL_CLOCK_NEW:
3370 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3371 return 1;
3372
3373 vcpu->kvm->arch.wall_clock = data;
3374 kvm_write_wall_clock(vcpu->kvm, data, 0);
3375 break;
3376 case MSR_KVM_WALL_CLOCK:
3377 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3378 return 1;
3379
3380 vcpu->kvm->arch.wall_clock = data;
3381 kvm_write_wall_clock(vcpu->kvm, data, 0);
3382 break;
3383 case MSR_KVM_SYSTEM_TIME_NEW:
3384 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3385 return 1;
3386
3387 kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
3388 break;
3389 case MSR_KVM_SYSTEM_TIME:
3390 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3391 return 1;
3392
3393 kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
3394 break;
3395 case MSR_KVM_ASYNC_PF_EN:
3396 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3397 return 1;
3398
3399 if (kvm_pv_enable_async_pf(vcpu, data))
3400 return 1;
3401 break;
3402 case MSR_KVM_ASYNC_PF_INT:
3403 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3404 return 1;
3405
3406 if (kvm_pv_enable_async_pf_int(vcpu, data))
3407 return 1;
3408 break;
3409 case MSR_KVM_ASYNC_PF_ACK:
3410 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3411 return 1;
3412 if (data & 0x1) {
3413 vcpu->arch.apf.pageready_pending = false;
3414 kvm_check_async_pf_completion(vcpu);
3415 }
3416 break;
3417 case MSR_KVM_STEAL_TIME:
3418 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3419 return 1;
3420
3421 if (unlikely(!sched_info_on()))
3422 return 1;
3423
3424 if (data & KVM_STEAL_RESERVED_MASK)
3425 return 1;
3426
3427 vcpu->arch.st.msr_val = data;
3428
3429 if (!(data & KVM_MSR_ENABLED))
3430 break;
3431
3432 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3433
3434 break;
3435 case MSR_KVM_PV_EOI_EN:
3436 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3437 return 1;
3438
3439 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
3440 return 1;
3441 break;
3442
3443 case MSR_KVM_POLL_CONTROL:
3444 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3445 return 1;
3446
3447
3448 if (data & (-1ULL << 1))
3449 return 1;
3450
3451 vcpu->arch.msr_kvm_poll_control = data;
3452 break;
3453
3454 case MSR_IA32_MCG_CTL:
3455 case MSR_IA32_MCG_STATUS:
3456 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3457 return set_msr_mce(vcpu, msr_info);
3458
3459 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3460 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3461 pr = true;
3462 fallthrough;
3463 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3464 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3465 if (kvm_pmu_is_valid_msr(vcpu, msr))
3466 return kvm_pmu_set_msr(vcpu, msr_info);
3467
3468 if (pr || data != 0)
3469 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3470 "0x%x data 0x%llx\n", msr, data);
3471 break;
3472 case MSR_K7_CLK_CTL:
3473
3474
3475
3476
3477
3478
3479
3480
3481 break;
3482 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3483 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3484 case HV_X64_MSR_SYNDBG_OPTIONS:
3485 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3486 case HV_X64_MSR_CRASH_CTL:
3487 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3488 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3489 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3490 case HV_X64_MSR_TSC_EMULATION_STATUS:
3491 return kvm_hv_set_msr_common(vcpu, msr, data,
3492 msr_info->host_initiated);
3493 case MSR_IA32_BBL_CR_CTL3:
3494
3495
3496
3497 if (report_ignored_msrs)
3498 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3499 msr, data);
3500 break;
3501 case MSR_AMD64_OSVW_ID_LENGTH:
3502 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3503 return 1;
3504 vcpu->arch.osvw.length = data;
3505 break;
3506 case MSR_AMD64_OSVW_STATUS:
3507 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3508 return 1;
3509 vcpu->arch.osvw.status = data;
3510 break;
3511 case MSR_PLATFORM_INFO:
3512 if (!msr_info->host_initiated ||
3513 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3514 cpuid_fault_enabled(vcpu)))
3515 return 1;
3516 vcpu->arch.msr_platform_info = data;
3517 break;
3518 case MSR_MISC_FEATURES_ENABLES:
3519 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3520 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3521 !supports_cpuid_fault(vcpu)))
3522 return 1;
3523 vcpu->arch.msr_misc_features_enables = data;
3524 break;
3525 default:
3526 if (kvm_pmu_is_valid_msr(vcpu, msr))
3527 return kvm_pmu_set_msr(vcpu, msr_info);
3528 return KVM_MSR_RET_INVALID;
3529 }
3530 return 0;
3531}
3532EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3533
3534static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3535{
3536 u64 data;
3537 u64 mcg_cap = vcpu->arch.mcg_cap;
3538 unsigned bank_num = mcg_cap & 0xff;
3539
3540 switch (msr) {
3541 case MSR_IA32_P5_MC_ADDR:
3542 case MSR_IA32_P5_MC_TYPE:
3543 data = 0;
3544 break;
3545 case MSR_IA32_MCG_CAP:
3546 data = vcpu->arch.mcg_cap;
3547 break;
3548 case MSR_IA32_MCG_CTL:
3549 if (!(mcg_cap & MCG_CTL_P) && !host)
3550 return 1;
3551 data = vcpu->arch.mcg_ctl;
3552 break;
3553 case MSR_IA32_MCG_STATUS:
3554 data = vcpu->arch.mcg_status;
3555 break;
3556 default:
3557 if (msr >= MSR_IA32_MC0_CTL &&
3558 msr < MSR_IA32_MCx_CTL(bank_num)) {
3559 u32 offset = array_index_nospec(
3560 msr - MSR_IA32_MC0_CTL,
3561 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3562
3563 data = vcpu->arch.mce_banks[offset];
3564 break;
3565 }
3566 return 1;
3567 }
3568 *pdata = data;
3569 return 0;
3570}
3571
3572int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3573{
3574 switch (msr_info->index) {
3575 case MSR_IA32_PLATFORM_ID:
3576 case MSR_IA32_EBL_CR_POWERON:
3577 case MSR_IA32_LASTBRANCHFROMIP:
3578 case MSR_IA32_LASTBRANCHTOIP:
3579 case MSR_IA32_LASTINTFROMIP:
3580 case MSR_IA32_LASTINTTOIP:
3581 case MSR_AMD64_SYSCFG:
3582 case MSR_K8_TSEG_ADDR:
3583 case MSR_K8_TSEG_MASK:
3584 case MSR_VM_HSAVE_PA:
3585 case MSR_K8_INT_PENDING_MSG:
3586 case MSR_AMD64_NB_CFG:
3587 case MSR_FAM10H_MMIO_CONF_BASE:
3588 case MSR_AMD64_BU_CFG2:
3589 case MSR_IA32_PERF_CTL:
3590 case MSR_AMD64_DC_CFG:
3591 case MSR_F15H_EX_CFG:
3592
3593
3594
3595
3596
3597
3598 case MSR_RAPL_POWER_UNIT:
3599 case MSR_PP0_ENERGY_STATUS:
3600 case MSR_PP1_ENERGY_STATUS:
3601 case MSR_PKG_ENERGY_STATUS:
3602 case MSR_DRAM_ENERGY_STATUS:
3603 msr_info->data = 0;
3604 break;
3605 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3606 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3607 return kvm_pmu_get_msr(vcpu, msr_info);
3608 if (!msr_info->host_initiated)
3609 return 1;
3610 msr_info->data = 0;
3611 break;
3612 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3613 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3614 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3615 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3616 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3617 return kvm_pmu_get_msr(vcpu, msr_info);
3618 msr_info->data = 0;
3619 break;
3620 case MSR_IA32_UCODE_REV:
3621 msr_info->data = vcpu->arch.microcode_version;
3622 break;
3623 case MSR_IA32_ARCH_CAPABILITIES:
3624 if (!msr_info->host_initiated &&
3625 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3626 return 1;
3627 msr_info->data = vcpu->arch.arch_capabilities;
3628 break;
3629 case MSR_IA32_PERF_CAPABILITIES:
3630 if (!msr_info->host_initiated &&
3631 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3632 return 1;
3633 msr_info->data = vcpu->arch.perf_capabilities;
3634 break;
3635 case MSR_IA32_POWER_CTL:
3636 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3637 break;
3638 case MSR_IA32_TSC: {
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648 u64 offset, ratio;
3649
3650 if (msr_info->host_initiated) {
3651 offset = vcpu->arch.l1_tsc_offset;
3652 ratio = vcpu->arch.l1_tsc_scaling_ratio;
3653 } else {
3654 offset = vcpu->arch.tsc_offset;
3655 ratio = vcpu->arch.tsc_scaling_ratio;
3656 }
3657
3658 msr_info->data = kvm_scale_tsc(vcpu, rdtsc(), ratio) + offset;
3659 break;
3660 }
3661 case MSR_MTRRcap:
3662 case 0x200 ... 0x2ff:
3663 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3664 case 0xcd:
3665 msr_info->data = 3;
3666 break;
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678 case MSR_EBC_FREQUENCY_ID:
3679 msr_info->data = 1 << 24;
3680 break;
3681 case MSR_IA32_APICBASE:
3682 msr_info->data = kvm_get_apic_base(vcpu);
3683 break;
3684 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3685 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3686 case MSR_IA32_TSC_DEADLINE:
3687 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3688 break;
3689 case MSR_IA32_TSC_ADJUST:
3690 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3691 break;
3692 case MSR_IA32_MISC_ENABLE:
3693 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3694 break;
3695 case MSR_IA32_SMBASE:
3696 if (!msr_info->host_initiated)
3697 return 1;
3698 msr_info->data = vcpu->arch.smbase;
3699 break;
3700 case MSR_SMI_COUNT:
3701 msr_info->data = vcpu->arch.smi_count;
3702 break;
3703 case MSR_IA32_PERF_STATUS:
3704
3705 msr_info->data = 1000ULL;
3706
3707 msr_info->data |= (((uint64_t)4ULL) << 40);
3708 break;
3709 case MSR_EFER:
3710 msr_info->data = vcpu->arch.efer;
3711 break;
3712 case MSR_KVM_WALL_CLOCK:
3713 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3714 return 1;
3715
3716 msr_info->data = vcpu->kvm->arch.wall_clock;
3717 break;
3718 case MSR_KVM_WALL_CLOCK_NEW:
3719 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3720 return 1;
3721
3722 msr_info->data = vcpu->kvm->arch.wall_clock;
3723 break;
3724 case MSR_KVM_SYSTEM_TIME:
3725 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3726 return 1;
3727
3728 msr_info->data = vcpu->arch.time;
3729 break;
3730 case MSR_KVM_SYSTEM_TIME_NEW:
3731 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3732 return 1;
3733
3734 msr_info->data = vcpu->arch.time;
3735 break;
3736 case MSR_KVM_ASYNC_PF_EN:
3737 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3738 return 1;
3739
3740 msr_info->data = vcpu->arch.apf.msr_en_val;
3741 break;
3742 case MSR_KVM_ASYNC_PF_INT:
3743 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3744 return 1;
3745
3746 msr_info->data = vcpu->arch.apf.msr_int_val;
3747 break;
3748 case MSR_KVM_ASYNC_PF_ACK:
3749 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3750 return 1;
3751
3752 msr_info->data = 0;
3753 break;
3754 case MSR_KVM_STEAL_TIME:
3755 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3756 return 1;
3757
3758 msr_info->data = vcpu->arch.st.msr_val;
3759 break;
3760 case MSR_KVM_PV_EOI_EN:
3761 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3762 return 1;
3763
3764 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3765 break;
3766 case MSR_KVM_POLL_CONTROL:
3767 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3768 return 1;
3769
3770 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3771 break;
3772 case MSR_IA32_P5_MC_ADDR:
3773 case MSR_IA32_P5_MC_TYPE:
3774 case MSR_IA32_MCG_CAP:
3775 case MSR_IA32_MCG_CTL:
3776 case MSR_IA32_MCG_STATUS:
3777 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3778 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3779 msr_info->host_initiated);
3780 case MSR_IA32_XSS:
3781 if (!msr_info->host_initiated &&
3782 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3783 return 1;
3784 msr_info->data = vcpu->arch.ia32_xss;
3785 break;
3786 case MSR_K7_CLK_CTL:
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796 msr_info->data = 0x20000000;
3797 break;
3798 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3799 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3800 case HV_X64_MSR_SYNDBG_OPTIONS:
3801 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3802 case HV_X64_MSR_CRASH_CTL:
3803 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3804 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3805 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3806 case HV_X64_MSR_TSC_EMULATION_STATUS:
3807 return kvm_hv_get_msr_common(vcpu,
3808 msr_info->index, &msr_info->data,
3809 msr_info->host_initiated);
3810 case MSR_IA32_BBL_CR_CTL3:
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821 msr_info->data = 0xbe702111;
3822 break;
3823 case MSR_AMD64_OSVW_ID_LENGTH:
3824 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3825 return 1;
3826 msr_info->data = vcpu->arch.osvw.length;
3827 break;
3828 case MSR_AMD64_OSVW_STATUS:
3829 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3830 return 1;
3831 msr_info->data = vcpu->arch.osvw.status;
3832 break;
3833 case MSR_PLATFORM_INFO:
3834 if (!msr_info->host_initiated &&
3835 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3836 return 1;
3837 msr_info->data = vcpu->arch.msr_platform_info;
3838 break;
3839 case MSR_MISC_FEATURES_ENABLES:
3840 msr_info->data = vcpu->arch.msr_misc_features_enables;
3841 break;
3842 case MSR_K7_HWCR:
3843 msr_info->data = vcpu->arch.msr_hwcr;
3844 break;
3845 default:
3846 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3847 return kvm_pmu_get_msr(vcpu, msr_info);
3848 return KVM_MSR_RET_INVALID;
3849 }
3850 return 0;
3851}
3852EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3853
3854
3855
3856
3857
3858
3859static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3860 struct kvm_msr_entry *entries,
3861 int (*do_msr)(struct kvm_vcpu *vcpu,
3862 unsigned index, u64 *data))
3863{
3864 int i;
3865
3866 for (i = 0; i < msrs->nmsrs; ++i)
3867 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3868 break;
3869
3870 return i;
3871}
3872
3873
3874
3875
3876
3877
3878static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3879 int (*do_msr)(struct kvm_vcpu *vcpu,
3880 unsigned index, u64 *data),
3881 int writeback)
3882{
3883 struct kvm_msrs msrs;
3884 struct kvm_msr_entry *entries;
3885 int r, n;
3886 unsigned size;
3887
3888 r = -EFAULT;
3889 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3890 goto out;
3891
3892 r = -E2BIG;
3893 if (msrs.nmsrs >= MAX_IO_MSRS)
3894 goto out;
3895
3896 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3897 entries = memdup_user(user_msrs->entries, size);
3898 if (IS_ERR(entries)) {
3899 r = PTR_ERR(entries);
3900 goto out;
3901 }
3902
3903 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3904 if (r < 0)
3905 goto out_free;
3906
3907 r = -EFAULT;
3908 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3909 goto out_free;
3910
3911 r = n;
3912
3913out_free:
3914 kfree(entries);
3915out:
3916 return r;
3917}
3918
3919static inline bool kvm_can_mwait_in_guest(void)
3920{
3921 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3922 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3923 boot_cpu_has(X86_FEATURE_ARAT);
3924}
3925
3926static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
3927 struct kvm_cpuid2 __user *cpuid_arg)
3928{
3929 struct kvm_cpuid2 cpuid;
3930 int r;
3931
3932 r = -EFAULT;
3933 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3934 return r;
3935
3936 r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3937 if (r)
3938 return r;
3939
3940 r = -EFAULT;
3941 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3942 return r;
3943
3944 return 0;
3945}
3946
3947int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3948{
3949 int r = 0;
3950
3951 switch (ext) {
3952 case KVM_CAP_IRQCHIP:
3953 case KVM_CAP_HLT:
3954 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3955 case KVM_CAP_SET_TSS_ADDR:
3956 case KVM_CAP_EXT_CPUID:
3957 case KVM_CAP_EXT_EMUL_CPUID:
3958 case KVM_CAP_CLOCKSOURCE:
3959 case KVM_CAP_PIT:
3960 case KVM_CAP_NOP_IO_DELAY:
3961 case KVM_CAP_MP_STATE:
3962 case KVM_CAP_SYNC_MMU:
3963 case KVM_CAP_USER_NMI:
3964 case KVM_CAP_REINJECT_CONTROL:
3965 case KVM_CAP_IRQ_INJECT_STATUS:
3966 case KVM_CAP_IOEVENTFD:
3967 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3968 case KVM_CAP_PIT2:
3969 case KVM_CAP_PIT_STATE2:
3970 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3971 case KVM_CAP_VCPU_EVENTS:
3972 case KVM_CAP_HYPERV:
3973 case KVM_CAP_HYPERV_VAPIC:
3974 case KVM_CAP_HYPERV_SPIN:
3975 case KVM_CAP_HYPERV_SYNIC:
3976 case KVM_CAP_HYPERV_SYNIC2:
3977 case KVM_CAP_HYPERV_VP_INDEX:
3978 case KVM_CAP_HYPERV_EVENTFD:
3979 case KVM_CAP_HYPERV_TLBFLUSH:
3980 case KVM_CAP_HYPERV_SEND_IPI:
3981 case KVM_CAP_HYPERV_CPUID:
3982 case KVM_CAP_HYPERV_ENFORCE_CPUID:
3983 case KVM_CAP_SYS_HYPERV_CPUID:
3984 case KVM_CAP_PCI_SEGMENT:
3985 case KVM_CAP_DEBUGREGS:
3986 case KVM_CAP_X86_ROBUST_SINGLESTEP:
3987 case KVM_CAP_XSAVE:
3988 case KVM_CAP_ASYNC_PF:
3989 case KVM_CAP_ASYNC_PF_INT:
3990 case KVM_CAP_GET_TSC_KHZ:
3991 case KVM_CAP_KVMCLOCK_CTRL:
3992 case KVM_CAP_READONLY_MEM:
3993 case KVM_CAP_HYPERV_TIME:
3994 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
3995 case KVM_CAP_TSC_DEADLINE_TIMER:
3996 case KVM_CAP_DISABLE_QUIRKS:
3997 case KVM_CAP_SET_BOOT_CPU_ID:
3998 case KVM_CAP_SPLIT_IRQCHIP:
3999 case KVM_CAP_IMMEDIATE_EXIT:
4000 case KVM_CAP_PMU_EVENT_FILTER:
4001 case KVM_CAP_GET_MSR_FEATURES:
4002 case KVM_CAP_MSR_PLATFORM_INFO:
4003 case KVM_CAP_EXCEPTION_PAYLOAD:
4004 case KVM_CAP_SET_GUEST_DEBUG:
4005 case KVM_CAP_LAST_CPU:
4006 case KVM_CAP_X86_USER_SPACE_MSR:
4007 case KVM_CAP_X86_MSR_FILTER:
4008 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4009#ifdef CONFIG_X86_SGX_KVM
4010 case KVM_CAP_SGX_ATTRIBUTE:
4011#endif
4012 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
4013 case KVM_CAP_SREGS2:
4014 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
4015 r = 1;
4016 break;
4017 case KVM_CAP_EXIT_HYPERCALL:
4018 r = KVM_EXIT_HYPERCALL_VALID_MASK;
4019 break;
4020 case KVM_CAP_SET_GUEST_DEBUG2:
4021 return KVM_GUESTDBG_VALID_MASK;
4022#ifdef CONFIG_KVM_XEN
4023 case KVM_CAP_XEN_HVM:
4024 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
4025 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
4026 KVM_XEN_HVM_CONFIG_SHARED_INFO;
4027 if (sched_info_on())
4028 r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
4029 break;
4030#endif
4031 case KVM_CAP_SYNC_REGS:
4032 r = KVM_SYNC_X86_VALID_FIELDS;
4033 break;
4034 case KVM_CAP_ADJUST_CLOCK:
4035 r = KVM_CLOCK_TSC_STABLE;
4036 break;
4037 case KVM_CAP_X86_DISABLE_EXITS:
4038 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
4039 KVM_X86_DISABLE_EXITS_CSTATE;
4040 if(kvm_can_mwait_in_guest())
4041 r |= KVM_X86_DISABLE_EXITS_MWAIT;
4042 break;
4043 case KVM_CAP_X86_SMM:
4044
4045
4046
4047
4048
4049
4050
4051
4052 r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE);
4053 break;
4054 case KVM_CAP_VAPIC:
4055 r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
4056 break;
4057 case KVM_CAP_NR_VCPUS:
4058 r = KVM_SOFT_MAX_VCPUS;
4059 break;
4060 case KVM_CAP_MAX_VCPUS:
4061 r = KVM_MAX_VCPUS;
4062 break;
4063 case KVM_CAP_MAX_VCPU_ID:
4064 r = KVM_MAX_VCPU_ID;
4065 break;
4066 case KVM_CAP_PV_MMU:
4067 r = 0;
4068 break;
4069 case KVM_CAP_MCE:
4070 r = KVM_MAX_MCE_BANKS;
4071 break;
4072 case KVM_CAP_XCRS:
4073 r = boot_cpu_has(X86_FEATURE_XSAVE);
4074 break;
4075 case KVM_CAP_TSC_CONTROL:
4076 r = kvm_has_tsc_control;
4077 break;
4078 case KVM_CAP_X2APIC_API:
4079 r = KVM_X2APIC_API_VALID_FLAGS;
4080 break;
4081 case KVM_CAP_NESTED_STATE:
4082 r = kvm_x86_ops.nested_ops->get_state ?
4083 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
4084 break;
4085 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4086 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
4087 break;
4088 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4089 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
4090 break;
4091 case KVM_CAP_SMALLER_MAXPHYADDR:
4092 r = (int) allow_smaller_maxphyaddr;
4093 break;
4094 case KVM_CAP_STEAL_TIME:
4095 r = sched_info_on();
4096 break;
4097 case KVM_CAP_X86_BUS_LOCK_EXIT:
4098 if (kvm_has_bus_lock_exit)
4099 r = KVM_BUS_LOCK_DETECTION_OFF |
4100 KVM_BUS_LOCK_DETECTION_EXIT;
4101 else
4102 r = 0;
4103 break;
4104 default:
4105 break;
4106 }
4107 return r;
4108
4109}
4110
4111long kvm_arch_dev_ioctl(struct file *filp,
4112 unsigned int ioctl, unsigned long arg)
4113{
4114 void __user *argp = (void __user *)arg;
4115 long r;
4116
4117 switch (ioctl) {
4118 case KVM_GET_MSR_INDEX_LIST: {
4119 struct kvm_msr_list __user *user_msr_list = argp;
4120 struct kvm_msr_list msr_list;
4121 unsigned n;
4122
4123 r = -EFAULT;
4124 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4125 goto out;
4126 n = msr_list.nmsrs;
4127 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
4128 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4129 goto out;
4130 r = -E2BIG;
4131 if (n < msr_list.nmsrs)
4132 goto out;
4133 r = -EFAULT;
4134 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
4135 num_msrs_to_save * sizeof(u32)))
4136 goto out;
4137 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
4138 &emulated_msrs,
4139 num_emulated_msrs * sizeof(u32)))
4140 goto out;
4141 r = 0;
4142 break;
4143 }
4144 case KVM_GET_SUPPORTED_CPUID:
4145 case KVM_GET_EMULATED_CPUID: {
4146 struct kvm_cpuid2 __user *cpuid_arg = argp;
4147 struct kvm_cpuid2 cpuid;
4148
4149 r = -EFAULT;
4150 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4151 goto out;
4152
4153 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
4154 ioctl);
4155 if (r)
4156 goto out;
4157
4158 r = -EFAULT;
4159 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4160 goto out;
4161 r = 0;
4162 break;
4163 }
4164 case KVM_X86_GET_MCE_CAP_SUPPORTED:
4165 r = -EFAULT;
4166 if (copy_to_user(argp, &kvm_mce_cap_supported,
4167 sizeof(kvm_mce_cap_supported)))
4168 goto out;
4169 r = 0;
4170 break;
4171 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
4172 struct kvm_msr_list __user *user_msr_list = argp;
4173 struct kvm_msr_list msr_list;
4174 unsigned int n;
4175
4176 r = -EFAULT;
4177 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4178 goto out;
4179 n = msr_list.nmsrs;
4180 msr_list.nmsrs = num_msr_based_features;
4181 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4182 goto out;
4183 r = -E2BIG;
4184 if (n < msr_list.nmsrs)
4185 goto out;
4186 r = -EFAULT;
4187 if (copy_to_user(user_msr_list->indices, &msr_based_features,
4188 num_msr_based_features * sizeof(u32)))
4189 goto out;
4190 r = 0;
4191 break;
4192 }
4193 case KVM_GET_MSRS:
4194 r = msr_io(NULL, argp, do_get_msr_feature, 1);
4195 break;
4196 case KVM_GET_SUPPORTED_HV_CPUID:
4197 r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
4198 break;
4199 default:
4200 r = -EINVAL;
4201 break;
4202 }
4203out:
4204 return r;
4205}
4206
4207static void wbinvd_ipi(void *garbage)
4208{
4209 wbinvd();
4210}
4211
4212static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
4213{
4214 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
4215}
4216
4217void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4218{
4219
4220 if (need_emulate_wbinvd(vcpu)) {
4221 if (static_call(kvm_x86_has_wbinvd_exit)())
4222 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4223 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
4224 smp_call_function_single(vcpu->cpu,
4225 wbinvd_ipi, NULL, 1);
4226 }
4227
4228 static_call(kvm_x86_vcpu_load)(vcpu, cpu);
4229
4230
4231 vcpu->arch.host_pkru = read_pkru();
4232
4233
4234 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
4235 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
4236 vcpu->arch.tsc_offset_adjustment = 0;
4237 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4238 }
4239
4240 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
4241 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
4242 rdtsc() - vcpu->arch.last_host_tsc;
4243 if (tsc_delta < 0)
4244 mark_tsc_unstable("KVM discovered backwards TSC");
4245
4246 if (kvm_check_tsc_unstable()) {
4247 u64 offset = kvm_compute_l1_tsc_offset(vcpu,
4248 vcpu->arch.last_guest_tsc);
4249 kvm_vcpu_write_tsc_offset(vcpu, offset);
4250 vcpu->arch.tsc_catchup = 1;
4251 }
4252
4253 if (kvm_lapic_hv_timer_in_use(vcpu))
4254 kvm_lapic_restart_hv_timer(vcpu);
4255
4256
4257
4258
4259
4260 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
4261 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
4262 if (vcpu->cpu != cpu)
4263 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
4264 vcpu->cpu = cpu;
4265 }
4266
4267 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
4268}
4269
4270static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
4271{
4272 struct kvm_host_map map;
4273 struct kvm_steal_time *st;
4274
4275 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
4276 return;
4277
4278 if (vcpu->arch.st.preempted)
4279 return;
4280
4281 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
4282 &vcpu->arch.st.cache, true))
4283 return;
4284
4285 st = map.hva +
4286 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
4287
4288 st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
4289
4290 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
4291}
4292
4293void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
4294{
4295 int idx;
4296
4297 if (vcpu->preempted && !vcpu->arch.guest_state_protected)
4298 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
4299
4300
4301
4302
4303
4304 idx = srcu_read_lock(&vcpu->kvm->srcu);
4305 if (kvm_xen_msr_enabled(vcpu->kvm))
4306 kvm_xen_runstate_set_preempted(vcpu);
4307 else
4308 kvm_steal_time_set_preempted(vcpu);
4309 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4310
4311 static_call(kvm_x86_vcpu_put)(vcpu);
4312 vcpu->arch.last_host_tsc = rdtsc();
4313
4314
4315
4316
4317
4318 set_debugreg(0, 6);
4319}
4320
4321static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
4322 struct kvm_lapic_state *s)
4323{
4324 if (vcpu->arch.apicv_active)
4325 static_call(kvm_x86_sync_pir_to_irr)(vcpu);
4326
4327 return kvm_apic_get_state(vcpu, s);
4328}
4329
4330static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
4331 struct kvm_lapic_state *s)
4332{
4333 int r;
4334
4335 r = kvm_apic_set_state(vcpu, s);
4336 if (r)
4337 return r;
4338 update_cr8_intercept(vcpu);
4339
4340 return 0;
4341}
4342
4343static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
4344{
4345
4346
4347
4348
4349
4350
4351 if (kvm_cpu_has_extint(vcpu))
4352 return false;
4353
4354
4355 return (!lapic_in_kernel(vcpu) ||
4356 kvm_apic_accept_pic_intr(vcpu));
4357}
4358
4359static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
4360{
4361
4362
4363
4364
4365
4366
4367
4368 return (kvm_arch_interrupt_allowed(vcpu) &&
4369 kvm_cpu_accept_dm_intr(vcpu) &&
4370 !kvm_event_needs_reinjection(vcpu) &&
4371 !vcpu->arch.exception.pending);
4372}
4373
4374static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
4375 struct kvm_interrupt *irq)
4376{
4377 if (irq->irq >= KVM_NR_INTERRUPTS)
4378 return -EINVAL;
4379
4380 if (!irqchip_in_kernel(vcpu->kvm)) {
4381 kvm_queue_interrupt(vcpu, irq->irq, false);
4382 kvm_make_request(KVM_REQ_EVENT, vcpu);
4383 return 0;
4384 }
4385
4386
4387
4388
4389
4390 if (pic_in_kernel(vcpu->kvm))
4391 return -ENXIO;
4392
4393 if (vcpu->arch.pending_external_vector != -1)
4394 return -EEXIST;
4395
4396 vcpu->arch.pending_external_vector = irq->irq;
4397 kvm_make_request(KVM_REQ_EVENT, vcpu);
4398 return 0;
4399}
4400
4401static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
4402{
4403 kvm_inject_nmi(vcpu);
4404
4405 return 0;
4406}
4407
4408static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
4409{
4410 kvm_make_request(KVM_REQ_SMI, vcpu);
4411
4412 return 0;
4413}
4414
4415static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
4416 struct kvm_tpr_access_ctl *tac)
4417{
4418 if (tac->flags)
4419 return -EINVAL;
4420 vcpu->arch.tpr_access_reporting = !!tac->enabled;
4421 return 0;
4422}
4423
4424static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
4425 u64 mcg_cap)
4426{
4427 int r;
4428 unsigned bank_num = mcg_cap & 0xff, bank;
4429
4430 r = -EINVAL;
4431 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
4432 goto out;
4433 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
4434 goto out;
4435 r = 0;
4436 vcpu->arch.mcg_cap = mcg_cap;
4437
4438 if (mcg_cap & MCG_CTL_P)
4439 vcpu->arch.mcg_ctl = ~(u64)0;
4440
4441 for (bank = 0; bank < bank_num; bank++)
4442 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
4443
4444 static_call(kvm_x86_setup_mce)(vcpu);
4445out:
4446 return r;
4447}
4448
4449static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
4450 struct kvm_x86_mce *mce)
4451{
4452 u64 mcg_cap = vcpu->arch.mcg_cap;
4453 unsigned bank_num = mcg_cap & 0xff;
4454 u64 *banks = vcpu->arch.mce_banks;
4455
4456 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
4457 return -EINVAL;
4458
4459
4460
4461
4462 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
4463 vcpu->arch.mcg_ctl != ~(u64)0)
4464 return 0;
4465 banks += 4 * mce->bank;
4466
4467
4468
4469
4470 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
4471 return 0;
4472 if (mce->status & MCI_STATUS_UC) {
4473 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
4474 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
4475 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4476 return 0;
4477 }
4478 if (banks[1] & MCI_STATUS_VAL)
4479 mce->status |= MCI_STATUS_OVER;
4480 banks[2] = mce->addr;
4481 banks[3] = mce->misc;
4482 vcpu->arch.mcg_status = mce->mcg_status;
4483 banks[1] = mce->status;
4484 kvm_queue_exception(vcpu, MC_VECTOR);
4485 } else if (!(banks[1] & MCI_STATUS_VAL)
4486 || !(banks[1] & MCI_STATUS_UC)) {
4487 if (banks[1] & MCI_STATUS_VAL)
4488 mce->status |= MCI_STATUS_OVER;
4489 banks[2] = mce->addr;
4490 banks[3] = mce->misc;
4491 banks[1] = mce->status;
4492 } else
4493 banks[1] |= MCI_STATUS_OVER;
4494 return 0;
4495}
4496
4497static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
4498 struct kvm_vcpu_events *events)
4499{
4500 process_nmi(vcpu);
4501
4502 if (kvm_check_request(KVM_REQ_SMI, vcpu))
4503 process_smi(vcpu);
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516 if (!vcpu->kvm->arch.exception_payload_enabled &&
4517 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
4518 kvm_deliver_exception_payload(vcpu);
4519
4520
4521
4522
4523
4524
4525
4526 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
4527 events->exception.injected = 0;
4528 events->exception.pending = 0;
4529 } else {
4530 events->exception.injected = vcpu->arch.exception.injected;
4531 events->exception.pending = vcpu->arch.exception.pending;
4532
4533
4534
4535
4536
4537 if (!vcpu->kvm->arch.exception_payload_enabled)
4538 events->exception.injected |=
4539 vcpu->arch.exception.pending;
4540 }
4541 events->exception.nr = vcpu->arch.exception.nr;
4542 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
4543 events->exception.error_code = vcpu->arch.exception.error_code;
4544 events->exception_has_payload = vcpu->arch.exception.has_payload;
4545 events->exception_payload = vcpu->arch.exception.payload;
4546
4547 events->interrupt.injected =
4548 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
4549 events->interrupt.nr = vcpu->arch.interrupt.nr;
4550 events->interrupt.soft = 0;
4551 events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
4552
4553 events->nmi.injected = vcpu->arch.nmi_injected;
4554 events->nmi.pending = vcpu->arch.nmi_pending != 0;
4555 events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
4556 events->nmi.pad = 0;
4557
4558 events->sipi_vector = 0;
4559
4560 events->smi.smm = is_smm(vcpu);
4561 events->smi.pending = vcpu->arch.smi_pending;
4562 events->smi.smm_inside_nmi =
4563 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
4564 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
4565
4566 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
4567 | KVM_VCPUEVENT_VALID_SHADOW
4568 | KVM_VCPUEVENT_VALID_SMM);
4569 if (vcpu->kvm->arch.exception_payload_enabled)
4570 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4571
4572 memset(&events->reserved, 0, sizeof(events->reserved));
4573}
4574
4575static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
4576
4577static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
4578 struct kvm_vcpu_events *events)
4579{
4580 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
4581 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
4582 | KVM_VCPUEVENT_VALID_SHADOW
4583 | KVM_VCPUEVENT_VALID_SMM
4584 | KVM_VCPUEVENT_VALID_PAYLOAD))
4585