1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29
30#include <linux/clocksource.h>
31#include <linux/interrupt.h>
32#include <linux/kvm.h>
33#include <linux/fs.h>
34#include <linux/vmalloc.h>
35#include <linux/module.h>
36#include <linux/mman.h>
37#include <linux/highmem.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/cpufreq.h>
41#include <linux/user-return-notifier.h>
42#include <linux/srcu.h>
43#include <linux/slab.h>
44#include <linux/perf_event.h>
45#include <linux/uaccess.h>
46#include <linux/hash.h>
47#include <linux/pci.h>
48#include <trace/events/kvm.h>
49
50#define CREATE_TRACE_POINTS
51#include "trace.h"
52
53#include <asm/debugreg.h>
54#include <asm/msr.h>
55#include <asm/desc.h>
56#include <asm/mtrr.h>
57#include <asm/mce.h>
58#include <asm/i387.h>
59#include <asm/xcr.h>
60#include <asm/pvclock.h>
61#include <asm/div64.h>
62
63#define MAX_IO_MSRS 256
64#define KVM_MAX_MCE_BANKS 32
65#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
66
67#define emul_to_vcpu(ctxt) \
68 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
69
70
71
72
73
74#ifdef CONFIG_X86_64
75static
76u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
77#else
78static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
79#endif
80
81#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
82#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
83
84static void update_cr8_intercept(struct kvm_vcpu *vcpu);
85static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
86 struct kvm_cpuid_entry2 __user *entries);
87static void process_nmi(struct kvm_vcpu *vcpu);
88
89struct kvm_x86_ops *kvm_x86_ops;
90EXPORT_SYMBOL_GPL(kvm_x86_ops);
91
92int ignore_msrs = 0;
93module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
94
95bool kvm_has_tsc_control;
96EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
97u32 kvm_max_guest_tsc_khz;
98EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
99
100#define KVM_NR_SHARED_MSRS 16
101
102struct kvm_shared_msrs_global {
103 int nr;
104 u32 msrs[KVM_NR_SHARED_MSRS];
105};
106
107struct kvm_shared_msrs {
108 struct user_return_notifier urn;
109 bool registered;
110 struct kvm_shared_msr_values {
111 u64 host;
112 u64 curr;
113 } values[KVM_NR_SHARED_MSRS];
114};
115
116static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
117static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
118
119struct kvm_stats_debugfs_item debugfs_entries[] = {
120 { "pf_fixed", VCPU_STAT(pf_fixed) },
121 { "pf_guest", VCPU_STAT(pf_guest) },
122 { "tlb_flush", VCPU_STAT(tlb_flush) },
123 { "invlpg", VCPU_STAT(invlpg) },
124 { "exits", VCPU_STAT(exits) },
125 { "io_exits", VCPU_STAT(io_exits) },
126 { "mmio_exits", VCPU_STAT(mmio_exits) },
127 { "signal_exits", VCPU_STAT(signal_exits) },
128 { "irq_window", VCPU_STAT(irq_window_exits) },
129 { "nmi_window", VCPU_STAT(nmi_window_exits) },
130 { "halt_exits", VCPU_STAT(halt_exits) },
131 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
132 { "hypercalls", VCPU_STAT(hypercalls) },
133 { "request_irq", VCPU_STAT(request_irq_exits) },
134 { "irq_exits", VCPU_STAT(irq_exits) },
135 { "host_state_reload", VCPU_STAT(host_state_reload) },
136 { "efer_reload", VCPU_STAT(efer_reload) },
137 { "fpu_reload", VCPU_STAT(fpu_reload) },
138 { "insn_emulation", VCPU_STAT(insn_emulation) },
139 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
140 { "irq_injections", VCPU_STAT(irq_injections) },
141 { "nmi_injections", VCPU_STAT(nmi_injections) },
142 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
143 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
144 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
145 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
146 { "mmu_flooded", VM_STAT(mmu_flooded) },
147 { "mmu_recycled", VM_STAT(mmu_recycled) },
148 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
149 { "mmu_unsync", VM_STAT(mmu_unsync) },
150 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
151 { "largepages", VM_STAT(lpages) },
152 { NULL }
153};
154
155u64 __read_mostly host_xcr0;
156
157int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
158
159static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160{
161 int i;
162 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
163 vcpu->arch.apf.gfns[i] = ~0;
164}
165
166static void kvm_on_user_return(struct user_return_notifier *urn)
167{
168 unsigned slot;
169 struct kvm_shared_msrs *locals
170 = container_of(urn, struct kvm_shared_msrs, urn);
171 struct kvm_shared_msr_values *values;
172
173 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
174 values = &locals->values[slot];
175 if (values->host != values->curr) {
176 wrmsrl(shared_msrs_global.msrs[slot], values->host);
177 values->curr = values->host;
178 }
179 }
180 locals->registered = false;
181 user_return_notifier_unregister(urn);
182}
183
184static void shared_msr_update(unsigned slot, u32 msr)
185{
186 struct kvm_shared_msrs *smsr;
187 u64 value;
188
189 smsr = &__get_cpu_var(shared_msrs);
190
191
192 if (slot >= shared_msrs_global.nr) {
193 printk(KERN_ERR "kvm: invalid MSR slot!");
194 return;
195 }
196 rdmsrl_safe(msr, &value);
197 smsr->values[slot].host = value;
198 smsr->values[slot].curr = value;
199}
200
201void kvm_define_shared_msr(unsigned slot, u32 msr)
202{
203 if (slot >= shared_msrs_global.nr)
204 shared_msrs_global.nr = slot + 1;
205 shared_msrs_global.msrs[slot] = msr;
206
207 smp_wmb();
208}
209EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
210
211static void kvm_shared_msr_cpu_online(void)
212{
213 unsigned i;
214
215 for (i = 0; i < shared_msrs_global.nr; ++i)
216 shared_msr_update(i, shared_msrs_global.msrs[i]);
217}
218
219void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
220{
221 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
222
223 if (((value ^ smsr->values[slot].curr) & mask) == 0)
224 return;
225 smsr->values[slot].curr = value;
226 wrmsrl(shared_msrs_global.msrs[slot], value);
227 if (!smsr->registered) {
228 smsr->urn.on_user_return = kvm_on_user_return;
229 user_return_notifier_register(&smsr->urn);
230 smsr->registered = true;
231 }
232}
233EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
234
235static void drop_user_return_notifiers(void *ignore)
236{
237 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
238
239 if (smsr->registered)
240 kvm_on_user_return(&smsr->urn);
241}
242
243u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
244{
245 if (irqchip_in_kernel(vcpu->kvm))
246 return vcpu->arch.apic_base;
247 else
248 return vcpu->arch.apic_base;
249}
250EXPORT_SYMBOL_GPL(kvm_get_apic_base);
251
252void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
253{
254
255 if (irqchip_in_kernel(vcpu->kvm))
256 kvm_lapic_set_base(vcpu, data);
257 else
258 vcpu->arch.apic_base = data;
259}
260EXPORT_SYMBOL_GPL(kvm_set_apic_base);
261
262#define EXCPT_BENIGN 0
263#define EXCPT_CONTRIBUTORY 1
264#define EXCPT_PF 2
265
266static int exception_class(int vector)
267{
268 switch (vector) {
269 case PF_VECTOR:
270 return EXCPT_PF;
271 case DE_VECTOR:
272 case TS_VECTOR:
273 case NP_VECTOR:
274 case SS_VECTOR:
275 case GP_VECTOR:
276 return EXCPT_CONTRIBUTORY;
277 default:
278 break;
279 }
280 return EXCPT_BENIGN;
281}
282
283static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
284 unsigned nr, bool has_error, u32 error_code,
285 bool reinject)
286{
287 u32 prev_nr;
288 int class1, class2;
289
290 kvm_make_request(KVM_REQ_EVENT, vcpu);
291
292 if (!vcpu->arch.exception.pending) {
293 queue:
294 vcpu->arch.exception.pending = true;
295 vcpu->arch.exception.has_error_code = has_error;
296 vcpu->arch.exception.nr = nr;
297 vcpu->arch.exception.error_code = error_code;
298 vcpu->arch.exception.reinject = reinject;
299 return;
300 }
301
302
303 prev_nr = vcpu->arch.exception.nr;
304 if (prev_nr == DF_VECTOR) {
305
306 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
307 return;
308 }
309 class1 = exception_class(prev_nr);
310 class2 = exception_class(nr);
311 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
312 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
313
314 vcpu->arch.exception.pending = true;
315 vcpu->arch.exception.has_error_code = true;
316 vcpu->arch.exception.nr = DF_VECTOR;
317 vcpu->arch.exception.error_code = 0;
318 } else
319
320
321
322 goto queue;
323}
324
325void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
326{
327 kvm_multiple_exception(vcpu, nr, false, 0, false);
328}
329EXPORT_SYMBOL_GPL(kvm_queue_exception);
330
331void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
332{
333 kvm_multiple_exception(vcpu, nr, false, 0, true);
334}
335EXPORT_SYMBOL_GPL(kvm_requeue_exception);
336
337void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
338{
339 if (err)
340 kvm_inject_gp(vcpu, 0);
341 else
342 kvm_x86_ops->skip_emulated_instruction(vcpu);
343}
344EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
345
346void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
347{
348 ++vcpu->stat.pf_guest;
349 vcpu->arch.cr2 = fault->address;
350 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
351}
352EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
353
354void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
355{
356 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
357 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
358 else
359 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
360}
361
362void kvm_inject_nmi(struct kvm_vcpu *vcpu)
363{
364 atomic_inc(&vcpu->arch.nmi_queued);
365 kvm_make_request(KVM_REQ_NMI, vcpu);
366}
367EXPORT_SYMBOL_GPL(kvm_inject_nmi);
368
369void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
370{
371 kvm_multiple_exception(vcpu, nr, true, error_code, false);
372}
373EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
374
375void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
376{
377 kvm_multiple_exception(vcpu, nr, true, error_code, true);
378}
379EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
380
381
382
383
384
385bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
386{
387 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
388 return true;
389 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
390 return false;
391}
392EXPORT_SYMBOL_GPL(kvm_require_cpl);
393
394
395
396
397
398
399int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
400 gfn_t ngfn, void *data, int offset, int len,
401 u32 access)
402{
403 gfn_t real_gfn;
404 gpa_t ngpa;
405
406 ngpa = gfn_to_gpa(ngfn);
407 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
408 if (real_gfn == UNMAPPED_GVA)
409 return -EFAULT;
410
411 real_gfn = gpa_to_gfn(real_gfn);
412
413 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
414}
415EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
416
417int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
418 void *data, int offset, int len, u32 access)
419{
420 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
421 data, offset, len, access);
422}
423
424
425
426
427int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
428{
429 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
430 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
431 int i;
432 int ret;
433 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
434
435 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
436 offset * sizeof(u64), sizeof(pdpte),
437 PFERR_USER_MASK|PFERR_WRITE_MASK);
438 if (ret < 0) {
439 ret = 0;
440 goto out;
441 }
442 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
443 if (is_present_gpte(pdpte[i]) &&
444 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
445 ret = 0;
446 goto out;
447 }
448 }
449 ret = 1;
450
451 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
452 __set_bit(VCPU_EXREG_PDPTR,
453 (unsigned long *)&vcpu->arch.regs_avail);
454 __set_bit(VCPU_EXREG_PDPTR,
455 (unsigned long *)&vcpu->arch.regs_dirty);
456out:
457
458 return ret;
459}
460EXPORT_SYMBOL_GPL(load_pdptrs);
461
462static bool pdptrs_changed(struct kvm_vcpu *vcpu)
463{
464 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
465 bool changed = true;
466 int offset;
467 gfn_t gfn;
468 int r;
469
470 if (is_long_mode(vcpu) || !is_pae(vcpu))
471 return false;
472
473 if (!test_bit(VCPU_EXREG_PDPTR,
474 (unsigned long *)&vcpu->arch.regs_avail))
475 return true;
476
477 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
478 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
479 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
480 PFERR_USER_MASK | PFERR_WRITE_MASK);
481 if (r < 0)
482 goto out;
483 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
484out:
485
486 return changed;
487}
488
489int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
490{
491 unsigned long old_cr0 = kvm_read_cr0(vcpu);
492 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
493 X86_CR0_CD | X86_CR0_NW;
494
495 cr0 |= X86_CR0_ET;
496
497#ifdef CONFIG_X86_64
498 if (cr0 & 0xffffffff00000000UL)
499 return 1;
500#endif
501
502 cr0 &= ~CR0_RESERVED_BITS;
503
504 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
505 return 1;
506
507 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
508 return 1;
509
510 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
511#ifdef CONFIG_X86_64
512 if ((vcpu->arch.efer & EFER_LME)) {
513 int cs_db, cs_l;
514
515 if (!is_pae(vcpu))
516 return 1;
517 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
518 if (cs_l)
519 return 1;
520 } else
521#endif
522 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
523 kvm_read_cr3(vcpu)))
524 return 1;
525 }
526
527 kvm_x86_ops->set_cr0(vcpu, cr0);
528
529 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
530 kvm_clear_async_pf_completion_queue(vcpu);
531 kvm_async_pf_hash_reset(vcpu);
532 }
533
534 if ((cr0 ^ old_cr0) & update_bits)
535 kvm_mmu_reset_context(vcpu);
536 return 0;
537}
538EXPORT_SYMBOL_GPL(kvm_set_cr0);
539
540void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
541{
542 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
543}
544EXPORT_SYMBOL_GPL(kvm_lmsw);
545
546int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
547{
548 u64 xcr0;
549
550
551 if (index != XCR_XFEATURE_ENABLED_MASK)
552 return 1;
553 xcr0 = xcr;
554 if (kvm_x86_ops->get_cpl(vcpu) != 0)
555 return 1;
556 if (!(xcr0 & XSTATE_FP))
557 return 1;
558 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
559 return 1;
560 if (xcr0 & ~host_xcr0)
561 return 1;
562 vcpu->arch.xcr0 = xcr0;
563 vcpu->guest_xcr0_loaded = 0;
564 return 0;
565}
566
567int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
568{
569 if (__kvm_set_xcr(vcpu, index, xcr)) {
570 kvm_inject_gp(vcpu, 0);
571 return 1;
572 }
573 return 0;
574}
575EXPORT_SYMBOL_GPL(kvm_set_xcr);
576
577static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
578{
579 struct kvm_cpuid_entry2 *best;
580
581 best = kvm_find_cpuid_entry(vcpu, 1, 0);
582 return best && (best->ecx & bit(X86_FEATURE_XSAVE));
583}
584
585static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
586{
587 struct kvm_cpuid_entry2 *best;
588
589 best = kvm_find_cpuid_entry(vcpu, 7, 0);
590 return best && (best->ebx & bit(X86_FEATURE_SMEP));
591}
592
593static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
594{
595 struct kvm_cpuid_entry2 *best;
596
597 best = kvm_find_cpuid_entry(vcpu, 7, 0);
598 return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
599}
600
601static void update_cpuid(struct kvm_vcpu *vcpu)
602{
603 struct kvm_cpuid_entry2 *best;
604 struct kvm_lapic *apic = vcpu->arch.apic;
605
606 best = kvm_find_cpuid_entry(vcpu, 1, 0);
607 if (!best)
608 return;
609
610
611 if (cpu_has_xsave && best->function == 0x1) {
612 best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
613 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
614 best->ecx |= bit(X86_FEATURE_OSXSAVE);
615 }
616
617 if (apic) {
618 if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
619 apic->lapic_timer.timer_mode_mask = 3 << 17;
620 else
621 apic->lapic_timer.timer_mode_mask = 1 << 17;
622 }
623}
624
625int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
626{
627 unsigned long old_cr4 = kvm_read_cr4(vcpu);
628 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
629 X86_CR4_PAE | X86_CR4_SMEP;
630 if (cr4 & CR4_RESERVED_BITS)
631 return 1;
632
633 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
634 return 1;
635
636 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
637 return 1;
638
639 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS))
640 return 1;
641
642 if (is_long_mode(vcpu)) {
643 if (!(cr4 & X86_CR4_PAE))
644 return 1;
645 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
646 && ((cr4 ^ old_cr4) & pdptr_bits)
647 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
648 kvm_read_cr3(vcpu)))
649 return 1;
650
651 if (kvm_x86_ops->set_cr4(vcpu, cr4))
652 return 1;
653
654 if ((cr4 ^ old_cr4) & pdptr_bits)
655 kvm_mmu_reset_context(vcpu);
656
657 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
658 update_cpuid(vcpu);
659
660 return 0;
661}
662EXPORT_SYMBOL_GPL(kvm_set_cr4);
663
664int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
665{
666 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
667 kvm_mmu_sync_roots(vcpu);
668 kvm_mmu_flush_tlb(vcpu);
669 return 0;
670 }
671
672 if (is_long_mode(vcpu)) {
673 if (cr3 & CR3_L_MODE_RESERVED_BITS)
674 return 1;
675 } else {
676 if (is_pae(vcpu)) {
677 if (cr3 & CR3_PAE_RESERVED_BITS)
678 return 1;
679 if (is_paging(vcpu) &&
680 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
681 return 1;
682 }
683
684
685
686
687 }
688
689
690
691
692
693
694
695
696
697
698 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
699 return 1;
700 vcpu->arch.cr3 = cr3;
701 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
702 vcpu->arch.mmu.new_cr3(vcpu);
703 return 0;
704}
705EXPORT_SYMBOL_GPL(kvm_set_cr3);
706
707int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
708{
709 if (cr8 & CR8_RESERVED_BITS)
710 return 1;
711 if (irqchip_in_kernel(vcpu->kvm))
712 kvm_lapic_set_tpr(vcpu, cr8);
713 else
714 vcpu->arch.cr8 = cr8;
715 return 0;
716}
717EXPORT_SYMBOL_GPL(kvm_set_cr8);
718
719unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
720{
721 if (irqchip_in_kernel(vcpu->kvm))
722 return kvm_lapic_get_cr8(vcpu);
723 else
724 return vcpu->arch.cr8;
725}
726EXPORT_SYMBOL_GPL(kvm_get_cr8);
727
728static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
729{
730 switch (dr) {
731 case 0 ... 3:
732 vcpu->arch.db[dr] = val;
733 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
734 vcpu->arch.eff_db[dr] = val;
735 break;
736 case 4:
737 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
738 return 1;
739
740 case 6:
741 if (val & 0xffffffff00000000ULL)
742 return -1;
743 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
744 break;
745 case 5:
746 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
747 return 1;
748
749 default:
750 if (val & 0xffffffff00000000ULL)
751 return -1;
752 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
753 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
754 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
755 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
756 }
757 break;
758 }
759
760 return 0;
761}
762
763int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
764{
765 int res;
766
767 res = __kvm_set_dr(vcpu, dr, val);
768 if (res > 0)
769 kvm_queue_exception(vcpu, UD_VECTOR);
770 else if (res < 0)
771 kvm_inject_gp(vcpu, 0);
772
773 return res;
774}
775EXPORT_SYMBOL_GPL(kvm_set_dr);
776
777static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
778{
779 switch (dr) {
780 case 0 ... 3:
781 *val = vcpu->arch.db[dr];
782 break;
783 case 4:
784 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
785 return 1;
786
787 case 6:
788 *val = vcpu->arch.dr6;
789 break;
790 case 5:
791 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
792 return 1;
793
794 default:
795 *val = vcpu->arch.dr7;
796 break;
797 }
798
799 return 0;
800}
801
802int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
803{
804 if (_kvm_get_dr(vcpu, dr, val)) {
805 kvm_queue_exception(vcpu, UD_VECTOR);
806 return 1;
807 }
808 return 0;
809}
810EXPORT_SYMBOL_GPL(kvm_get_dr);
811
812
813
814
815
816
817
818
819
820
821#define KVM_SAVE_MSRS_BEGIN 9
822static u32 msrs_to_save[] = {
823 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
824 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
825 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
826 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
827 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
828 MSR_STAR,
829#ifdef CONFIG_X86_64
830 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
831#endif
832 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
833};
834
835static unsigned num_msrs_to_save;
836
837static u32 emulated_msrs[] = {
838 MSR_IA32_TSCDEADLINE,
839 MSR_IA32_MISC_ENABLE,
840 MSR_IA32_MCG_STATUS,
841 MSR_IA32_MCG_CTL,
842};
843
844static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
845{
846 u64 old_efer = vcpu->arch.efer;
847
848 if (efer & efer_reserved_bits)
849 return 1;
850
851 if (is_paging(vcpu)
852 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
853 return 1;
854
855 if (efer & EFER_FFXSR) {
856 struct kvm_cpuid_entry2 *feat;
857
858 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
859 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
860 return 1;
861 }
862
863 if (efer & EFER_SVME) {
864 struct kvm_cpuid_entry2 *feat;
865
866 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
867 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
868 return 1;
869 }
870
871 efer &= ~EFER_LMA;
872 efer |= vcpu->arch.efer & EFER_LMA;
873
874 kvm_x86_ops->set_efer(vcpu, efer);
875
876 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
877
878
879 if ((efer ^ old_efer) & EFER_NX)
880 kvm_mmu_reset_context(vcpu);
881
882 return 0;
883}
884
885void kvm_enable_efer_bits(u64 mask)
886{
887 efer_reserved_bits &= ~mask;
888}
889EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
890
891
892
893
894
895
896
897int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
898{
899 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
900}
901
902
903
904
905static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
906{
907 return kvm_set_msr(vcpu, index, *data);
908}
909
910static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
911{
912 int version;
913 int r;
914 struct pvclock_wall_clock wc;
915 struct timespec boot;
916
917 if (!wall_clock)
918 return;
919
920 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
921 if (r)
922 return;
923
924 if (version & 1)
925 ++version;
926
927 ++version;
928
929 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
930
931
932
933
934
935
936
937 getboottime(&boot);
938
939 wc.sec = boot.tv_sec;
940 wc.nsec = boot.tv_nsec;
941 wc.version = version;
942
943 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
944
945 version++;
946 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
947}
948
949static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
950{
951 uint32_t quotient, remainder;
952
953
954
955 __asm__ ( "divl %4"
956 : "=a" (quotient), "=d" (remainder)
957 : "0" (0), "1" (dividend), "r" (divisor) );
958 return quotient;
959}
960
961static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
962 s8 *pshift, u32 *pmultiplier)
963{
964 uint64_t scaled64;
965 int32_t shift = 0;
966 uint64_t tps64;
967 uint32_t tps32;
968
969 tps64 = base_khz * 1000LL;
970 scaled64 = scaled_khz * 1000LL;
971 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
972 tps64 >>= 1;
973 shift--;
974 }
975
976 tps32 = (uint32_t)tps64;
977 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
978 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
979 scaled64 >>= 1;
980 else
981 tps32 <<= 1;
982 shift++;
983 }
984
985 *pshift = shift;
986 *pmultiplier = div_frac(scaled64, tps32);
987
988 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
989 __func__, base_khz, scaled_khz, shift, *pmultiplier);
990}
991
992static inline u64 get_kernel_ns(void)
993{
994 struct timespec ts;
995
996 WARN_ON(preemptible());
997 ktime_get_ts(&ts);
998 monotonic_to_bootbased(&ts);
999 return timespec_to_ns(&ts);
1000}
1001
1002static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1003unsigned long max_tsc_khz;
1004
1005static inline int kvm_tsc_changes_freq(void)
1006{
1007 int cpu = get_cpu();
1008 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
1009 cpufreq_quick_get(cpu) != 0;
1010 put_cpu();
1011 return ret;
1012}
1013
1014u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
1015{
1016 if (vcpu->arch.virtual_tsc_khz)
1017 return vcpu->arch.virtual_tsc_khz;
1018 else
1019 return __this_cpu_read(cpu_tsc_khz);
1020}
1021
1022static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1023{
1024 u64 ret;
1025
1026 WARN_ON(preemptible());
1027 if (kvm_tsc_changes_freq())
1028 printk_once(KERN_WARNING
1029 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
1030 ret = nsec * vcpu_tsc_khz(vcpu);
1031 do_div(ret, USEC_PER_SEC);
1032 return ret;
1033}
1034
1035static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1036{
1037
1038 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1039 &vcpu->arch.tsc_catchup_shift,
1040 &vcpu->arch.tsc_catchup_mult);
1041}
1042
1043static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1044{
1045 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1046 vcpu->arch.tsc_catchup_mult,
1047 vcpu->arch.tsc_catchup_shift);
1048 tsc += vcpu->arch.last_tsc_write;
1049 return tsc;
1050}
1051
1052void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1053{
1054 struct kvm *kvm = vcpu->kvm;
1055 u64 offset, ns, elapsed;
1056 unsigned long flags;
1057 s64 sdiff;
1058
1059 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1060 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1061 ns = get_kernel_ns();
1062 elapsed = ns - kvm->arch.last_tsc_nsec;
1063 sdiff = data - kvm->arch.last_tsc_write;
1064 if (sdiff < 0)
1065 sdiff = -sdiff;
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
1077 elapsed < 5ULL * NSEC_PER_SEC) {
1078 if (!check_tsc_unstable()) {
1079 offset = kvm->arch.last_tsc_offset;
1080 pr_debug("kvm: matched tsc offset for %llu\n", data);
1081 } else {
1082 u64 delta = nsec_to_cycles(vcpu, elapsed);
1083 offset += delta;
1084 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1085 }
1086 ns = kvm->arch.last_tsc_nsec;
1087 }
1088 kvm->arch.last_tsc_nsec = ns;
1089 kvm->arch.last_tsc_write = data;
1090 kvm->arch.last_tsc_offset = offset;
1091 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1092 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1093
1094
1095 vcpu->arch.hv_clock.tsc_timestamp = 0;
1096 vcpu->arch.last_tsc_write = data;
1097 vcpu->arch.last_tsc_nsec = ns;
1098}
1099EXPORT_SYMBOL_GPL(kvm_write_tsc);
1100
1101static int kvm_guest_time_update(struct kvm_vcpu *v)
1102{
1103 unsigned long flags;
1104 struct kvm_vcpu_arch *vcpu = &v->arch;
1105 void *shared_kaddr;
1106 unsigned long this_tsc_khz;
1107 s64 kernel_ns, max_kernel_ns;
1108 u64 tsc_timestamp;
1109
1110
1111 local_irq_save(flags);
1112 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1113 kernel_ns = get_kernel_ns();
1114 this_tsc_khz = vcpu_tsc_khz(v);
1115 if (unlikely(this_tsc_khz == 0)) {
1116 local_irq_restore(flags);
1117 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1118 return 1;
1119 }
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131 if (vcpu->tsc_catchup) {
1132 u64 tsc = compute_guest_tsc(v, kernel_ns);
1133 if (tsc > tsc_timestamp) {
1134 kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
1135 tsc_timestamp = tsc;
1136 }
1137 }
1138
1139 local_irq_restore(flags);
1140
1141 if (!vcpu->time_page)
1142 return 0;
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165 max_kernel_ns = 0;
1166 if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
1167 max_kernel_ns = vcpu->last_guest_tsc -
1168 vcpu->hv_clock.tsc_timestamp;
1169 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1170 vcpu->hv_clock.tsc_to_system_mul,
1171 vcpu->hv_clock.tsc_shift);
1172 max_kernel_ns += vcpu->last_kernel_ns;
1173 }
1174
1175 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1176 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1177 &vcpu->hv_clock.tsc_shift,
1178 &vcpu->hv_clock.tsc_to_system_mul);
1179 vcpu->hw_tsc_khz = this_tsc_khz;
1180 }
1181
1182 if (max_kernel_ns > kernel_ns)
1183 kernel_ns = max_kernel_ns;
1184
1185
1186 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1187 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1188 vcpu->last_kernel_ns = kernel_ns;
1189 vcpu->last_guest_tsc = tsc_timestamp;
1190 vcpu->hv_clock.flags = 0;
1191
1192
1193
1194
1195
1196
1197 vcpu->hv_clock.version += 2;
1198
1199 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
1200
1201 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1202 sizeof(vcpu->hv_clock));
1203
1204 kunmap_atomic(shared_kaddr, KM_USER0);
1205
1206 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1207 return 0;
1208}
1209
1210static bool msr_mtrr_valid(unsigned msr)
1211{
1212 switch (msr) {
1213 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1214 case MSR_MTRRfix64K_00000:
1215 case MSR_MTRRfix16K_80000:
1216 case MSR_MTRRfix16K_A0000:
1217 case MSR_MTRRfix4K_C0000:
1218 case MSR_MTRRfix4K_C8000:
1219 case MSR_MTRRfix4K_D0000:
1220 case MSR_MTRRfix4K_D8000:
1221 case MSR_MTRRfix4K_E0000:
1222 case MSR_MTRRfix4K_E8000:
1223 case MSR_MTRRfix4K_F0000:
1224 case MSR_MTRRfix4K_F8000:
1225 case MSR_MTRRdefType:
1226 case MSR_IA32_CR_PAT:
1227 return true;
1228 case 0x2f8:
1229 return true;
1230 }
1231 return false;
1232}
1233
1234static bool valid_pat_type(unsigned t)
1235{
1236 return t < 8 && (1 << t) & 0xf3;
1237}
1238
1239static bool valid_mtrr_type(unsigned t)
1240{
1241 return t < 8 && (1 << t) & 0x73;
1242}
1243
1244static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1245{
1246 int i;
1247
1248 if (!msr_mtrr_valid(msr))
1249 return false;
1250
1251 if (msr == MSR_IA32_CR_PAT) {
1252 for (i = 0; i < 8; i++)
1253 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1254 return false;
1255 return true;
1256 } else if (msr == MSR_MTRRdefType) {
1257 if (data & ~0xcff)
1258 return false;
1259 return valid_mtrr_type(data & 0xff);
1260 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1261 for (i = 0; i < 8 ; i++)
1262 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1263 return false;
1264 return true;
1265 }
1266
1267
1268 return valid_mtrr_type(data & 0xff);
1269}
1270
1271static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1272{
1273 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1274
1275 if (!mtrr_valid(vcpu, msr, data))
1276 return 1;
1277
1278 if (msr == MSR_MTRRdefType) {
1279 vcpu->arch.mtrr_state.def_type = data;
1280 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1281 } else if (msr == MSR_MTRRfix64K_00000)
1282 p[0] = data;
1283 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1284 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1285 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1286 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1287 else if (msr == MSR_IA32_CR_PAT)
1288 vcpu->arch.pat = data;
1289 else {
1290 int idx, is_mtrr_mask;
1291 u64 *pt;
1292
1293 idx = (msr - 0x200) / 2;
1294 is_mtrr_mask = msr - 0x200 - 2 * idx;
1295 if (!is_mtrr_mask)
1296 pt =
1297 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1298 else
1299 pt =
1300 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1301 *pt = data;
1302 }
1303
1304 kvm_mmu_reset_context(vcpu);
1305 return 0;
1306}
1307
1308static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1309{
1310 u64 mcg_cap = vcpu->arch.mcg_cap;
1311 unsigned bank_num = mcg_cap & 0xff;
1312
1313 switch (msr) {
1314 case MSR_IA32_MCG_STATUS:
1315 vcpu->arch.mcg_status = data;
1316 break;
1317 case MSR_IA32_MCG_CTL:
1318 if (!(mcg_cap & MCG_CTL_P))
1319 return 1;
1320 if (data != 0 && data != ~(u64)0)
1321 return -1;
1322 vcpu->arch.mcg_ctl = data;
1323 break;
1324 default:
1325 if (msr >= MSR_IA32_MC0_CTL &&
1326 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1327 u32 offset = msr - MSR_IA32_MC0_CTL;
1328
1329
1330
1331
1332
1333 if ((offset & 0x3) == 0 &&
1334 data != 0 && (data | (1 << 10)) != ~(u64)0)
1335 return -1;
1336 vcpu->arch.mce_banks[offset] = data;
1337 break;
1338 }
1339 return 1;
1340 }
1341 return 0;
1342}
1343
1344static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1345{
1346 struct kvm *kvm = vcpu->kvm;
1347 int lm = is_long_mode(vcpu);
1348 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1349 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1350 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1351 : kvm->arch.xen_hvm_config.blob_size_32;
1352 u32 page_num = data & ~PAGE_MASK;
1353 u64 page_addr = data & PAGE_MASK;
1354 u8 *page;
1355 int r;
1356
1357 r = -E2BIG;
1358 if (page_num >= blob_size)
1359 goto out;
1360 r = -ENOMEM;
1361 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1362 if (!page)
1363 goto out;
1364 r = -EFAULT;
1365 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
1366 goto out_free;
1367 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1368 goto out_free;
1369 r = 0;
1370out_free:
1371 kfree(page);
1372out:
1373 return r;
1374}
1375
1376static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1377{
1378 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1379}
1380
1381static bool kvm_hv_msr_partition_wide(u32 msr)
1382{
1383 bool r = false;
1384 switch (msr) {
1385 case HV_X64_MSR_GUEST_OS_ID:
1386 case HV_X64_MSR_HYPERCALL:
1387 r = true;
1388 break;
1389 }
1390
1391 return r;
1392}
1393
1394static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1395{
1396 struct kvm *kvm = vcpu->kvm;
1397
1398 switch (msr) {
1399 case HV_X64_MSR_GUEST_OS_ID:
1400 kvm->arch.hv_guest_os_id = data;
1401
1402 if (!kvm->arch.hv_guest_os_id)
1403 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1404 break;
1405 case HV_X64_MSR_HYPERCALL: {
1406 u64 gfn;
1407 unsigned long addr;
1408 u8 instructions[4];
1409
1410
1411 if (!kvm->arch.hv_guest_os_id)
1412 break;
1413 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1414 kvm->arch.hv_hypercall = data;
1415 break;
1416 }
1417 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1418 addr = gfn_to_hva(kvm, gfn);
1419 if (kvm_is_error_hva(addr))
1420 return 1;
1421 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1422 ((unsigned char *)instructions)[3] = 0xc3;
1423 if (__copy_to_user((void __user *)addr, instructions, 4))
1424 return 1;
1425 kvm->arch.hv_hypercall = data;
1426 break;
1427 }
1428 default:
1429 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1430 "data 0x%llx\n", msr, data);
1431 return 1;
1432 }
1433 return 0;
1434}
1435
1436static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1437{
1438 switch (msr) {
1439 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1440 unsigned long addr;
1441
1442 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1443 vcpu->arch.hv_vapic = data;
1444 break;
1445 }
1446 addr = gfn_to_hva(vcpu->kvm, data >>
1447 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1448 if (kvm_is_error_hva(addr))
1449 return 1;
1450 if (__clear_user((void __user *)addr, PAGE_SIZE))
1451 return 1;
1452 vcpu->arch.hv_vapic = data;
1453 break;
1454 }
1455 case HV_X64_MSR_EOI:
1456 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1457 case HV_X64_MSR_ICR:
1458 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1459 case HV_X64_MSR_TPR:
1460 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1461 default:
1462 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1463 "data 0x%llx\n", msr, data);
1464 return 1;
1465 }
1466
1467 return 0;
1468}
1469
1470static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1471{
1472 gpa_t gpa = data & ~0x3f;
1473
1474
1475 if (data & 0x3c)
1476 return 1;
1477
1478 vcpu->arch.apf.msr_val = data;
1479
1480 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1481 kvm_clear_async_pf_completion_queue(vcpu);
1482 kvm_async_pf_hash_reset(vcpu);
1483 return 0;
1484 }
1485
1486 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1487 return 1;
1488
1489 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1490 kvm_async_pf_wakeup_all(vcpu);
1491 return 0;
1492}
1493
1494static void kvmclock_reset(struct kvm_vcpu *vcpu)
1495{
1496 if (vcpu->arch.time_page) {
1497 kvm_release_page_dirty(vcpu->arch.time_page);
1498 vcpu->arch.time_page = NULL;
1499 }
1500}
1501
1502static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1503{
1504 u64 delta;
1505
1506 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1507 return;
1508
1509 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1510 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1511 vcpu->arch.st.accum_steal = delta;
1512}
1513
1514static void record_steal_time(struct kvm_vcpu *vcpu)
1515{
1516 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1517 return;
1518
1519 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1520 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1521 return;
1522
1523 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1524 vcpu->arch.st.steal.version += 2;
1525 vcpu->arch.st.accum_steal = 0;
1526
1527 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1528 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1529}
1530
1531int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1532{
1533 switch (msr) {
1534 case MSR_EFER:
1535 return set_efer(vcpu, data);
1536 case MSR_K7_HWCR:
1537 data &= ~(u64)0x40;
1538 data &= ~(u64)0x100;
1539 if (data != 0) {
1540 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1541 data);
1542 return 1;
1543 }
1544 break;
1545 case MSR_FAM10H_MMIO_CONF_BASE:
1546 if (data != 0) {
1547 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1548 "0x%llx\n", data);
1549 return 1;
1550 }
1551 break;
1552 case MSR_AMD64_NB_CFG:
1553 break;
1554 case MSR_IA32_DEBUGCTLMSR:
1555 if (!data) {
1556
1557 break;
1558 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1559
1560
1561 return 1;
1562 }
1563 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1564 __func__, data);
1565 break;
1566 case MSR_IA32_UCODE_REV:
1567 case MSR_IA32_UCODE_WRITE:
1568 case MSR_VM_HSAVE_PA:
1569 case MSR_AMD64_PATCH_LOADER:
1570 break;
1571 case 0x200 ... 0x2ff:
1572 return set_msr_mtrr(vcpu, msr, data);
1573 case MSR_IA32_APICBASE:
1574 kvm_set_apic_base(vcpu, data);
1575 break;
1576 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1577 return kvm_x2apic_msr_write(vcpu, msr, data);
1578 case MSR_IA32_TSCDEADLINE:
1579 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1580 break;
1581 case MSR_IA32_MISC_ENABLE:
1582 vcpu->arch.ia32_misc_enable_msr = data;
1583 break;
1584 case MSR_KVM_WALL_CLOCK_NEW:
1585 case MSR_KVM_WALL_CLOCK:
1586 vcpu->kvm->arch.wall_clock = data;
1587 kvm_write_wall_clock(vcpu->kvm, data);
1588 break;
1589 case MSR_KVM_SYSTEM_TIME_NEW:
1590 case MSR_KVM_SYSTEM_TIME: {
1591 kvmclock_reset(vcpu);
1592
1593 vcpu->arch.time = data;
1594 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1595
1596
1597 if (!(data & 1))
1598 break;
1599
1600
1601 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1602
1603 vcpu->arch.time_page =
1604 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1605
1606 if (is_error_page(vcpu->arch.time_page)) {
1607 kvm_release_page_clean(vcpu->arch.time_page);
1608 vcpu->arch.time_page = NULL;
1609 }
1610 break;
1611 }
1612 case MSR_KVM_ASYNC_PF_EN:
1613 if (kvm_pv_enable_async_pf(vcpu, data))
1614 return 1;
1615 break;
1616 case MSR_KVM_STEAL_TIME:
1617
1618 if (unlikely(!sched_info_on()))
1619 return 1;
1620
1621 if (data & KVM_STEAL_RESERVED_MASK)
1622 return 1;
1623
1624 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
1625 data & KVM_STEAL_VALID_BITS))
1626 return 1;
1627
1628 vcpu->arch.st.msr_val = data;
1629
1630 if (!(data & KVM_MSR_ENABLED))
1631 break;
1632
1633 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1634
1635 preempt_disable();
1636 accumulate_steal_time(vcpu);
1637 preempt_enable();
1638
1639 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
1640
1641 break;
1642
1643 case MSR_IA32_MCG_CTL:
1644 case MSR_IA32_MCG_STATUS:
1645 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1646 return set_msr_mce(vcpu, msr, data);
1647
1648
1649
1650
1651
1652
1653
1654
1655 case MSR_P6_EVNTSEL0:
1656 case MSR_P6_EVNTSEL1:
1657 case MSR_K7_EVNTSEL0:
1658 case MSR_K7_EVNTSEL1:
1659 case MSR_K7_EVNTSEL2:
1660 case MSR_K7_EVNTSEL3:
1661 if (data != 0)
1662 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1663 "0x%x data 0x%llx\n", msr, data);
1664 break;
1665
1666
1667
1668 case MSR_P6_PERFCTR0:
1669 case MSR_P6_PERFCTR1:
1670 case MSR_K7_PERFCTR0:
1671 case MSR_K7_PERFCTR1:
1672 case MSR_K7_PERFCTR2:
1673 case MSR_K7_PERFCTR3:
1674 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1675 "0x%x data 0x%llx\n", msr, data);
1676 break;
1677 case MSR_K7_CLK_CTL:
1678
1679
1680
1681
1682
1683
1684
1685
1686 break;
1687 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1688 if (kvm_hv_msr_partition_wide(msr)) {
1689 int r;
1690 mutex_lock(&vcpu->kvm->lock);
1691 r = set_msr_hyperv_pw(vcpu, msr, data);
1692 mutex_unlock(&vcpu->kvm->lock);
1693 return r;
1694 } else
1695 return set_msr_hyperv(vcpu, msr, data);
1696 break;
1697 case MSR_IA32_BBL_CR_CTL3:
1698
1699
1700
1701 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
1702 break;
1703 default:
1704 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1705 return xen_hvm_config(vcpu, data);
1706 if (!ignore_msrs) {
1707 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
1708 msr, data);
1709 return 1;
1710 } else {
1711 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
1712 msr, data);
1713 break;
1714 }
1715 }
1716 return 0;
1717}
1718EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1719
1720
1721
1722
1723
1724
1725
1726int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1727{
1728 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1729}
1730
1731static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1732{
1733 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1734
1735 if (!msr_mtrr_valid(msr))
1736 return 1;
1737
1738 if (msr == MSR_MTRRdefType)
1739 *pdata = vcpu->arch.mtrr_state.def_type +
1740 (vcpu->arch.mtrr_state.enabled << 10);
1741 else if (msr == MSR_MTRRfix64K_00000)
1742 *pdata = p[0];
1743 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1744 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
1745 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1746 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
1747 else if (msr == MSR_IA32_CR_PAT)
1748 *pdata = vcpu->arch.pat;
1749 else {
1750 int idx, is_mtrr_mask;
1751 u64 *pt;
1752
1753 idx = (msr - 0x200) / 2;
1754 is_mtrr_mask = msr - 0x200 - 2 * idx;
1755 if (!is_mtrr_mask)
1756 pt =
1757 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1758 else
1759 pt =
1760 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1761 *pdata = *pt;
1762 }
1763
1764 return 0;
1765}
1766
1767static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1768{
1769 u64 data;
1770 u64 mcg_cap = vcpu->arch.mcg_cap;
1771 unsigned bank_num = mcg_cap & 0xff;
1772
1773 switch (msr) {
1774 case MSR_IA32_P5_MC_ADDR:
1775 case MSR_IA32_P5_MC_TYPE:
1776 data = 0;
1777 break;
1778 case MSR_IA32_MCG_CAP:
1779 data = vcpu->arch.mcg_cap;
1780 break;
1781 case MSR_IA32_MCG_CTL:
1782 if (!(mcg_cap & MCG_CTL_P))
1783 return 1;
1784 data = vcpu->arch.mcg_ctl;
1785 break;
1786 case MSR_IA32_MCG_STATUS:
1787 data = vcpu->arch.mcg_status;
1788 break;
1789 default:
1790 if (msr >= MSR_IA32_MC0_CTL &&
1791 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1792 u32 offset = msr - MSR_IA32_MC0_CTL;
1793 data = vcpu->arch.mce_banks[offset];
1794 break;
1795 }
1796 return 1;
1797 }
1798 *pdata = data;
1799 return 0;
1800}
1801
1802static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1803{
1804 u64 data = 0;
1805 struct kvm *kvm = vcpu->kvm;
1806
1807 switch (msr) {
1808 case HV_X64_MSR_GUEST_OS_ID:
1809 data = kvm->arch.hv_guest_os_id;
1810 break;
1811 case HV_X64_MSR_HYPERCALL:
1812 data = kvm->arch.hv_hypercall;
1813 break;
1814 default:
1815 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1816 return 1;
1817 }
1818
1819 *pdata = data;
1820 return 0;
1821}
1822
1823static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1824{
1825 u64 data = 0;
1826
1827 switch (msr) {
1828 case HV_X64_MSR_VP_INDEX: {
1829 int r;
1830 struct kvm_vcpu *v;
1831 kvm_for_each_vcpu(r, v, vcpu->kvm)
1832 if (v == vcpu)
1833 data = r;
1834 break;
1835 }
1836 case HV_X64_MSR_EOI:
1837 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1838 case HV_X64_MSR_ICR:
1839 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1840 case HV_X64_MSR_TPR:
1841 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1842 case HV_X64_MSR_APIC_ASSIST_PAGE:
1843 data = vcpu->arch.hv_vapic;
1844 break;
1845 default:
1846 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1847 return 1;
1848 }
1849 *pdata = data;
1850 return 0;
1851}
1852
1853int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1854{
1855 u64 data;
1856
1857 switch (msr) {
1858 case MSR_IA32_PLATFORM_ID:
1859 case MSR_IA32_EBL_CR_POWERON:
1860 case MSR_IA32_DEBUGCTLMSR:
1861 case MSR_IA32_LASTBRANCHFROMIP:
1862 case MSR_IA32_LASTBRANCHTOIP:
1863 case MSR_IA32_LASTINTFROMIP:
1864 case MSR_IA32_LASTINTTOIP:
1865 case MSR_K8_SYSCFG:
1866 case MSR_K7_HWCR:
1867 case MSR_VM_HSAVE_PA:
1868 case MSR_P6_PERFCTR0:
1869 case MSR_P6_PERFCTR1:
1870 case MSR_P6_EVNTSEL0:
1871 case MSR_P6_EVNTSEL1:
1872 case MSR_K7_EVNTSEL0:
1873 case MSR_K7_PERFCTR0:
1874 case MSR_K8_INT_PENDING_MSG:
1875 case MSR_AMD64_NB_CFG:
1876 case MSR_FAM10H_MMIO_CONF_BASE:
1877 data = 0;
1878 break;
1879 case MSR_IA32_UCODE_REV:
1880 data = 0x100000000ULL;
1881 break;
1882 case MSR_MTRRcap:
1883 data = 0x500 | KVM_NR_VAR_MTRR;
1884 break;
1885 case 0x200 ... 0x2ff:
1886 return get_msr_mtrr(vcpu, msr, pdata);
1887 case 0xcd:
1888 data = 3;
1889 break;
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901 case MSR_EBC_FREQUENCY_ID:
1902 data = 1 << 24;
1903 break;
1904 case MSR_IA32_APICBASE:
1905 data = kvm_get_apic_base(vcpu);
1906 break;
1907 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1908 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1909 break;
1910 case MSR_IA32_TSCDEADLINE:
1911 data = kvm_get_lapic_tscdeadline_msr(vcpu);
1912 break;
1913 case MSR_IA32_MISC_ENABLE:
1914 data = vcpu->arch.ia32_misc_enable_msr;
1915 break;
1916 case MSR_IA32_PERF_STATUS:
1917
1918 data = 1000ULL;
1919
1920 data |= (((uint64_t)4ULL) << 40);
1921 break;
1922 case MSR_EFER:
1923 data = vcpu->arch.efer;
1924 break;
1925 case MSR_KVM_WALL_CLOCK:
1926 case MSR_KVM_WALL_CLOCK_NEW:
1927 data = vcpu->kvm->arch.wall_clock;
1928 break;
1929 case MSR_KVM_SYSTEM_TIME:
1930 case MSR_KVM_SYSTEM_TIME_NEW:
1931 data = vcpu->arch.time;
1932 break;
1933 case MSR_KVM_ASYNC_PF_EN:
1934 data = vcpu->arch.apf.msr_val;
1935 break;
1936 case MSR_KVM_STEAL_TIME:
1937 data = vcpu->arch.st.msr_val;
1938 break;
1939 case MSR_IA32_P5_MC_ADDR:
1940 case MSR_IA32_P5_MC_TYPE:
1941 case MSR_IA32_MCG_CAP:
1942 case MSR_IA32_MCG_CTL:
1943 case MSR_IA32_MCG_STATUS:
1944 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1945 return get_msr_mce(vcpu, msr, pdata);
1946 case MSR_K7_CLK_CTL:
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956 data = 0x20000000;
1957 break;
1958 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1959 if (kvm_hv_msr_partition_wide(msr)) {
1960 int r;
1961 mutex_lock(&vcpu->kvm->lock);
1962 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1963 mutex_unlock(&vcpu->kvm->lock);
1964 return r;
1965 } else
1966 return get_msr_hyperv(vcpu, msr, pdata);
1967 break;
1968 case MSR_IA32_BBL_CR_CTL3:
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979 data = 0xbe702111;
1980 break;
1981 default:
1982 if (!ignore_msrs) {
1983 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1984 return 1;
1985 } else {
1986 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
1987 data = 0;
1988 }
1989 break;
1990 }
1991 *pdata = data;
1992 return 0;
1993}
1994EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1995
1996
1997
1998
1999
2000
2001static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2002 struct kvm_msr_entry *entries,
2003 int (*do_msr)(struct kvm_vcpu *vcpu,
2004 unsigned index, u64 *data))
2005{
2006 int i, idx;
2007
2008 idx = srcu_read_lock(&vcpu->kvm->srcu);
2009 for (i = 0; i < msrs->nmsrs; ++i)
2010 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2011 break;
2012 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2013
2014 return i;
2015}
2016
2017
2018
2019
2020
2021
2022static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2023 int (*do_msr)(struct kvm_vcpu *vcpu,
2024 unsigned index, u64 *data),
2025 int writeback)
2026{
2027 struct kvm_msrs msrs;
2028 struct kvm_msr_entry *entries;
2029 int r, n;
2030 unsigned size;
2031
2032 r = -EFAULT;
2033 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2034 goto out;
2035
2036 r = -E2BIG;
2037 if (msrs.nmsrs >= MAX_IO_MSRS)
2038 goto out;
2039
2040 r = -ENOMEM;
2041 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2042 entries = kmalloc(size, GFP_KERNEL);
2043 if (!entries)
2044 goto out;
2045
2046 r = -EFAULT;
2047 if (copy_from_user(entries, user_msrs->entries, size))
2048 goto out_free;
2049
2050 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2051 if (r < 0)
2052 goto out_free;
2053
2054 r = -EFAULT;
2055 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2056 goto out_free;
2057
2058 r = n;
2059
2060out_free:
2061 kfree(entries);
2062out:
2063 return r;
2064}
2065
2066int kvm_dev_ioctl_check_extension(long ext)
2067{
2068 int r;
2069
2070 switch (ext) {
2071 case KVM_CAP_IRQCHIP:
2072 case KVM_CAP_HLT:
2073 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2074 case KVM_CAP_SET_TSS_ADDR:
2075 case KVM_CAP_EXT_CPUID:
2076 case KVM_CAP_CLOCKSOURCE:
2077 case KVM_CAP_PIT:
2078 case KVM_CAP_NOP_IO_DELAY:
2079 case KVM_CAP_MP_STATE:
2080 case KVM_CAP_SYNC_MMU:
2081 case KVM_CAP_USER_NMI:
2082 case KVM_CAP_REINJECT_CONTROL:
2083 case KVM_CAP_IRQ_INJECT_STATUS:
2084 case KVM_CAP_ASSIGN_DEV_IRQ:
2085 case KVM_CAP_IRQFD:
2086 case KVM_CAP_IOEVENTFD:
2087 case KVM_CAP_PIT2:
2088 case KVM_CAP_PIT_STATE2:
2089 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2090 case KVM_CAP_XEN_HVM:
2091 case KVM_CAP_ADJUST_CLOCK:
2092 case KVM_CAP_VCPU_EVENTS:
2093 case KVM_CAP_HYPERV:
2094 case KVM_CAP_HYPERV_VAPIC:
2095 case KVM_CAP_HYPERV_SPIN:
2096 case KVM_CAP_PCI_SEGMENT:
2097 case KVM_CAP_DEBUGREGS:
2098 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2099 case KVM_CAP_XSAVE:
2100 case KVM_CAP_ASYNC_PF:
2101 case KVM_CAP_GET_TSC_KHZ:
2102 r = 1;
2103 break;
2104 case KVM_CAP_COALESCED_MMIO:
2105 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2106 break;
2107 case KVM_CAP_VAPIC:
2108 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2109 break;
2110 case KVM_CAP_NR_VCPUS:
2111 r = KVM_SOFT_MAX_VCPUS;
2112 break;
2113 case KVM_CAP_MAX_VCPUS:
2114 r = KVM_MAX_VCPUS;
2115 break;
2116 case KVM_CAP_NR_MEMSLOTS:
2117 r = KVM_MEMORY_SLOTS;
2118 break;
2119 case KVM_CAP_PV_MMU:
2120 r = 0;
2121 break;
2122 case KVM_CAP_IOMMU:
2123 r = iommu_present(&pci_bus_type);
2124 break;
2125 case KVM_CAP_MCE:
2126 r = KVM_MAX_MCE_BANKS;
2127 break;
2128 case KVM_CAP_XCRS:
2129 r = cpu_has_xsave;
2130 break;
2131 case KVM_CAP_TSC_CONTROL:
2132 r = kvm_has_tsc_control;
2133 break;
2134 case KVM_CAP_TSC_DEADLINE_TIMER:
2135 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2136 break;
2137 default:
2138 r = 0;
2139 break;
2140 }
2141 return r;
2142
2143}
2144
2145long kvm_arch_dev_ioctl(struct file *filp,
2146 unsigned int ioctl, unsigned long arg)
2147{
2148 void __user *argp = (void __user *)arg;
2149 long r;
2150
2151 switch (ioctl) {
2152 case KVM_GET_MSR_INDEX_LIST: {
2153 struct kvm_msr_list __user *user_msr_list = argp;
2154 struct kvm_msr_list msr_list;
2155 unsigned n;
2156
2157 r = -EFAULT;
2158 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2159 goto out;
2160 n = msr_list.nmsrs;
2161 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2162 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2163 goto out;
2164 r = -E2BIG;
2165 if (n < msr_list.nmsrs)
2166 goto out;
2167 r = -EFAULT;
2168 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2169 num_msrs_to_save * sizeof(u32)))
2170 goto out;
2171 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2172 &emulated_msrs,
2173 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2174 goto out;
2175 r = 0;
2176 break;
2177 }
2178 case KVM_GET_SUPPORTED_CPUID: {
2179 struct kvm_cpuid2 __user *cpuid_arg = argp;
2180 struct kvm_cpuid2 cpuid;
2181
2182 r = -EFAULT;
2183 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2184 goto out;
2185 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2186 cpuid_arg->entries);
2187 if (r)
2188 goto out;
2189
2190 r = -EFAULT;
2191 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2192 goto out;
2193 r = 0;
2194 break;
2195 }
2196 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2197 u64 mce_cap;
2198
2199 mce_cap = KVM_MCE_CAP_SUPPORTED;
2200 r = -EFAULT;
2201 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2202 goto out;
2203 r = 0;
2204 break;
2205 }
2206 default:
2207 r = -EINVAL;
2208 }
2209out:
2210 return r;
2211}
2212
2213static void wbinvd_ipi(void *garbage)
2214{
2215 wbinvd();
2216}
2217
2218static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2219{
2220 return vcpu->kvm->arch.iommu_domain &&
2221 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2222}
2223
2224void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2225{
2226
2227 if (need_emulate_wbinvd(vcpu)) {
2228 if (kvm_x86_ops->has_wbinvd_exit())
2229 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2230 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2231 smp_call_function_single(vcpu->cpu,
2232 wbinvd_ipi, NULL, 1);
2233 }
2234
2235 kvm_x86_ops->vcpu_load(vcpu, cpu);
2236 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2237
2238 s64 tsc_delta;
2239 u64 tsc;
2240
2241 tsc = kvm_x86_ops->read_l1_tsc(vcpu);
2242 tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
2243 tsc - vcpu->arch.last_guest_tsc;
2244
2245 if (tsc_delta < 0)
2246 mark_tsc_unstable("KVM discovered backwards TSC");
2247 if (check_tsc_unstable()) {
2248 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
2249 vcpu->arch.tsc_catchup = 1;
2250 }
2251 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2252 if (vcpu->cpu != cpu)
2253 kvm_migrate_timers(vcpu);
2254 vcpu->cpu = cpu;
2255 }
2256
2257 accumulate_steal_time(vcpu);
2258 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2259}
2260
2261void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2262{
2263 kvm_x86_ops->vcpu_put(vcpu);
2264 kvm_put_guest_fpu(vcpu);
2265 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
2266}
2267
2268static int is_efer_nx(void)
2269{
2270 unsigned long long efer = 0;
2271
2272 rdmsrl_safe(MSR_EFER, &efer);
2273 return efer & EFER_NX;
2274}
2275
2276static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
2277{
2278 int i;
2279 struct kvm_cpuid_entry2 *e, *entry;
2280
2281 entry = NULL;
2282 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2283 e = &vcpu->arch.cpuid_entries[i];
2284 if (e->function == 0x80000001) {
2285 entry = e;
2286 break;
2287 }
2288 }
2289 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
2290 entry->edx &= ~(1 << 20);
2291 printk(KERN_INFO "kvm: guest NX capability removed\n");
2292 }
2293}
2294
2295
2296static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2297 struct kvm_cpuid *cpuid,
2298 struct kvm_cpuid_entry __user *entries)
2299{
2300 int r, i;
2301 struct kvm_cpuid_entry *cpuid_entries;
2302
2303 r = -E2BIG;
2304 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2305 goto out;
2306 r = -ENOMEM;
2307 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
2308 if (!cpuid_entries)
2309 goto out;
2310 r = -EFAULT;
2311 if (copy_from_user(cpuid_entries, entries,
2312 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
2313 goto out_free;
2314 for (i = 0; i < cpuid->nent; i++) {
2315 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
2316 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
2317 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
2318 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
2319 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
2320 vcpu->arch.cpuid_entries[i].index = 0;
2321 vcpu->arch.cpuid_entries[i].flags = 0;
2322 vcpu->arch.cpuid_entries[i].padding[0] = 0;
2323 vcpu->arch.cpuid_entries[i].padding[1] = 0;
2324 vcpu->arch.cpuid_entries[i].padding[2] = 0;
2325 }
2326 vcpu->arch.cpuid_nent = cpuid->nent;
2327 cpuid_fix_nx_cap(vcpu);
2328 r = 0;
2329 kvm_apic_set_version(vcpu);
2330 kvm_x86_ops->cpuid_update(vcpu);
2331 update_cpuid(vcpu);
2332
2333out_free:
2334 vfree(cpuid_entries);
2335out:
2336 return r;
2337}
2338
2339static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
2340 struct kvm_cpuid2 *cpuid,
2341 struct kvm_cpuid_entry2 __user *entries)
2342{
2343 int r;
2344
2345 r = -E2BIG;
2346 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2347 goto out;
2348 r = -EFAULT;
2349 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
2350 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
2351 goto out;
2352 vcpu->arch.cpuid_nent = cpuid->nent;
2353 kvm_apic_set_version(vcpu);
2354 kvm_x86_ops->cpuid_update(vcpu);
2355 update_cpuid(vcpu);
2356 return 0;
2357
2358out:
2359 return r;
2360}
2361
2362static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
2363 struct kvm_cpuid2 *cpuid,
2364 struct kvm_cpuid_entry2 __user *entries)
2365{
2366 int r;
2367
2368 r = -E2BIG;
2369 if (cpuid->nent < vcpu->arch.cpuid_nent)
2370 goto out;
2371 r = -EFAULT;
2372 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
2373 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
2374 goto out;
2375 return 0;
2376
2377out:
2378 cpuid->nent = vcpu->arch.cpuid_nent;
2379 return r;
2380}
2381
2382static void cpuid_mask(u32 *word, int wordnum)
2383{
2384 *word &= boot_cpu_data.x86_capability[wordnum];
2385}
2386
2387static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2388 u32 index)
2389{
2390 entry->function = function;
2391 entry->index = index;
2392 cpuid_count(entry->function, entry->index,
2393 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
2394 entry->flags = 0;
2395}
2396
2397static bool supported_xcr0_bit(unsigned bit)
2398{
2399 u64 mask = ((u64)1 << bit);
2400
2401 return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
2402}
2403
2404#define F(x) bit(X86_FEATURE_##x)
2405
2406static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2407 u32 index, int *nent, int maxnent)
2408{
2409 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
2410#ifdef CONFIG_X86_64
2411 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
2412 ? F(GBPAGES) : 0;
2413 unsigned f_lm = F(LM);
2414#else
2415 unsigned f_gbpages = 0;
2416 unsigned f_lm = 0;
2417#endif
2418 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
2419
2420
2421 const u32 kvm_supported_word0_x86_features =
2422 F(FPU) | F(VME) | F(DE) | F(PSE) |
2423 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
2424 F(CX8) | F(APIC) | 0 | F(SEP) |
2425 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
2426 F(PAT) | F(PSE36) | 0 | F(CLFLSH) |
2427 0 | F(MMX) |
2428 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
2429 0 ;
2430
2431 const u32 kvm_supported_word1_x86_features =
2432 F(FPU) | F(VME) | F(DE) | F(PSE) |
2433 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
2434 F(CX8) | F(APIC) | 0 | F(SYSCALL) |
2435 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
2436 F(PAT) | F(PSE36) | 0 |
2437 f_nx | 0 | F(MMXEXT) | F(MMX) |
2438 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
2439 0 | f_lm | F(3DNOWEXT) | F(3DNOW);
2440
2441 const u32 kvm_supported_word4_x86_features =
2442 F(XMM3) | F(PCLMULQDQ) | 0 |
2443 0 |
2444 0 | F(SSSE3) | 0 | 0 |
2445 0 | F(CX16) | 0 |
2446 0 | F(XMM4_1) |
2447 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
2448 0 | F(AES) | F(XSAVE) | 0 | F(AVX) |
2449 F(F16C) | F(RDRAND);
2450
2451 const u32 kvm_supported_word6_x86_features =
2452 F(LAHF_LM) | F(CMP_LEGACY) | 0 | 0 |
2453 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
2454 F(3DNOWPREFETCH) | 0 | 0 | F(XOP) |
2455 0 | F(FMA4) | F(TBM);
2456
2457
2458 const u32 kvm_supported_word5_x86_features =
2459 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
2460 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
2461 F(PMM) | F(PMM_EN);
2462
2463
2464 const u32 kvm_supported_word9_x86_features =
2465 F(SMEP) | F(FSGSBASE) | F(ERMS);
2466
2467
2468 get_cpu();
2469 do_cpuid_1_ent(entry, function, index);
2470 ++*nent;
2471
2472 switch (function) {
2473 case 0:
2474 entry->eax = min(entry->eax, (u32)0xd);
2475 break;
2476 case 1:
2477 entry->edx &= kvm_supported_word0_x86_features;
2478 cpuid_mask(&entry->edx, 0);
2479 entry->ecx &= kvm_supported_word4_x86_features;
2480 cpuid_mask(&entry->ecx, 4);
2481
2482
2483 entry->ecx |= F(X2APIC);
2484 break;
2485
2486
2487
2488
2489 case 2: {
2490 int t, times = entry->eax & 0xff;
2491
2492 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
2493 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
2494 for (t = 1; t < times && *nent < maxnent; ++t) {
2495 do_cpuid_1_ent(&entry[t], function, 0);
2496 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
2497 ++*nent;
2498 }
2499 break;
2500 }
2501
2502 case 4: {
2503 int i, cache_type;
2504
2505 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2506
2507 for (i = 1; *nent < maxnent; ++i) {
2508 cache_type = entry[i - 1].eax & 0x1f;
2509 if (!cache_type)
2510 break;
2511 do_cpuid_1_ent(&entry[i], function, i);
2512 entry[i].flags |=
2513 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2514 ++*nent;
2515 }
2516 break;
2517 }
2518 case 7: {
2519 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2520
2521 if (index == 0) {
2522 entry->ebx &= kvm_supported_word9_x86_features;
2523 cpuid_mask(&entry->ebx, 9);
2524 } else
2525 entry->ebx = 0;
2526 entry->eax = 0;
2527 entry->ecx = 0;
2528 entry->edx = 0;
2529 break;
2530 }
2531 case 9:
2532 break;
2533
2534 case 0xb: {
2535 int i, level_type;
2536
2537 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2538
2539 for (i = 1; *nent < maxnent; ++i) {
2540 level_type = entry[i - 1].ecx & 0xff00;
2541 if (!level_type)
2542 break;
2543 do_cpuid_1_ent(&entry[i], function, i);
2544 entry[i].flags |=
2545 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2546 ++*nent;
2547 }
2548 break;
2549 }
2550 case 0xd: {
2551 int idx, i;
2552
2553 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2554 for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) {
2555 do_cpuid_1_ent(&entry[i], function, idx);
2556 if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
2557 continue;
2558 entry[i].flags |=
2559 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2560 ++*nent;
2561 ++i;
2562 }
2563 break;
2564 }
2565 case KVM_CPUID_SIGNATURE: {
2566 char signature[12] = "KVMKVMKVM\0\0";
2567 u32 *sigptr = (u32 *)signature;
2568 entry->eax = 0;
2569 entry->ebx = sigptr[0];
2570 entry->ecx = sigptr[1];
2571 entry->edx = sigptr[2];
2572 break;
2573 }
2574 case KVM_CPUID_FEATURES:
2575 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
2576 (1 << KVM_FEATURE_NOP_IO_DELAY) |
2577 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2578 (1 << KVM_FEATURE_ASYNC_PF) |
2579 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2580
2581 if (sched_info_on())
2582 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
2583
2584 entry->ebx = 0;
2585 entry->ecx = 0;
2586 entry->edx = 0;
2587 break;
2588 case 0x80000000:
2589 entry->eax = min(entry->eax, 0x8000001a);
2590 break;
2591 case 0x80000001:
2592 entry->edx &= kvm_supported_word1_x86_features;
2593 cpuid_mask(&entry->edx, 1);
2594 entry->ecx &= kvm_supported_word6_x86_features;
2595 cpuid_mask(&entry->ecx, 6);
2596 break;
2597 case 0x80000008: {
2598 unsigned g_phys_as = (entry->eax >> 16) & 0xff;
2599 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
2600 unsigned phys_as = entry->eax & 0xff;
2601
2602 if (!g_phys_as)
2603 g_phys_as = phys_as;
2604 entry->eax = g_phys_as | (virt_as << 8);
2605 entry->ebx = entry->edx = 0;
2606 break;
2607 }
2608 case 0x80000019:
2609 entry->ecx = entry->edx = 0;
2610 break;
2611 case 0x8000001a:
2612 break;
2613 case 0x8000001d:
2614 break;
2615
2616 case 0xC0000000:
2617
2618 entry->eax = min(entry->eax, 0xC0000004);
2619 break;
2620 case 0xC0000001:
2621 entry->edx &= kvm_supported_word5_x86_features;
2622 cpuid_mask(&entry->edx, 5);
2623 break;
2624 case 3:
2625 case 5:
2626 case 6:
2627 case 0xA:
2628 case 0x80000007:
2629 case 0xC0000002:
2630 case 0xC0000003:
2631 case 0xC0000004:
2632 default:
2633 entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
2634 break;
2635 }
2636
2637 kvm_x86_ops->set_supported_cpuid(function, entry);
2638
2639 put_cpu();
2640}
2641
2642#undef F
2643
2644static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
2645 struct kvm_cpuid_entry2 __user *entries)
2646{
2647 struct kvm_cpuid_entry2 *cpuid_entries;
2648 int limit, nent = 0, r = -E2BIG;
2649 u32 func;
2650
2651 if (cpuid->nent < 1)
2652 goto out;
2653 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2654 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
2655 r = -ENOMEM;
2656 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
2657 if (!cpuid_entries)
2658 goto out;
2659
2660 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
2661 limit = cpuid_entries[0].eax;
2662 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
2663 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2664 &nent, cpuid->nent);
2665 r = -E2BIG;
2666 if (nent >= cpuid->nent)
2667 goto out_free;
2668
2669 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
2670 limit = cpuid_entries[nent - 1].eax;
2671 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
2672 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2673 &nent, cpuid->nent);
2674
2675
2676
2677 r = -E2BIG;
2678 if (nent >= cpuid->nent)
2679 goto out_free;
2680
2681
2682 if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
2683 do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
2684 &nent, cpuid->nent);
2685
2686 r = -E2BIG;
2687 if (nent >= cpuid->nent)
2688 goto out_free;
2689
2690 limit = cpuid_entries[nent - 1].eax;
2691 for (func = 0xC0000001;
2692 func <= limit && nent < cpuid->nent; ++func)
2693 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2694 &nent, cpuid->nent);
2695
2696 r = -E2BIG;
2697 if (nent >= cpuid->nent)
2698 goto out_free;
2699 }
2700
2701 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2702 cpuid->nent);
2703
2704 r = -E2BIG;
2705 if (nent >= cpuid->nent)
2706 goto out_free;
2707
2708 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
2709 cpuid->nent);
2710
2711 r = -E2BIG;
2712 if (nent >= cpuid->nent)
2713 goto out_free;
2714
2715 r = -EFAULT;
2716 if (copy_to_user(entries, cpuid_entries,
2717 nent * sizeof(struct kvm_cpuid_entry2)))
2718 goto out_free;
2719 cpuid->nent = nent;
2720 r = 0;
2721
2722out_free:
2723 vfree(cpuid_entries);
2724out:
2725 return r;
2726}
2727
2728static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2729 struct kvm_lapic_state *s)
2730{
2731 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2732
2733 return 0;
2734}
2735
2736static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2737 struct kvm_lapic_state *s)
2738{
2739 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2740 kvm_apic_post_state_restore(vcpu);
2741 update_cr8_intercept(vcpu);
2742
2743 return 0;
2744}
2745
2746static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2747 struct kvm_interrupt *irq)
2748{
2749 if (irq->irq < 0 || irq->irq >= 256)
2750 return -EINVAL;
2751 if (irqchip_in_kernel(vcpu->kvm))
2752 return -ENXIO;
2753
2754 kvm_queue_interrupt(vcpu, irq->irq, false);
2755 kvm_make_request(KVM_REQ_EVENT, vcpu);
2756
2757 return 0;
2758}
2759
2760static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2761{
2762 kvm_inject_nmi(vcpu);
2763
2764 return 0;
2765}
2766
2767static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2768 struct kvm_tpr_access_ctl *tac)
2769{
2770 if (tac->flags)
2771 return -EINVAL;
2772 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2773 return 0;
2774}
2775
2776static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2777 u64 mcg_cap)
2778{
2779 int r;
2780 unsigned bank_num = mcg_cap & 0xff, bank;
2781
2782 r = -EINVAL;
2783 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2784 goto out;
2785 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2786 goto out;
2787 r = 0;
2788 vcpu->arch.mcg_cap = mcg_cap;
2789
2790 if (mcg_cap & MCG_CTL_P)
2791 vcpu->arch.mcg_ctl = ~(u64)0;
2792
2793 for (bank = 0; bank < bank_num; bank++)
2794 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2795out:
2796 return r;
2797}
2798
2799static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2800 struct kvm_x86_mce *mce)
2801{
2802 u64 mcg_cap = vcpu->arch.mcg_cap;
2803 unsigned bank_num = mcg_cap & 0xff;
2804 u64 *banks = vcpu->arch.mce_banks;
2805
2806 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2807 return -EINVAL;
2808
2809
2810
2811
2812 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2813 vcpu->arch.mcg_ctl != ~(u64)0)
2814 return 0;
2815 banks += 4 * mce->bank;
2816
2817
2818
2819
2820 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2821 return 0;
2822 if (mce->status & MCI_STATUS_UC) {
2823 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2824 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2825 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2826 return 0;
2827 }
2828 if (banks[1] & MCI_STATUS_VAL)
2829 mce->status |= MCI_STATUS_OVER;
2830 banks[2] = mce->addr;
2831 banks[3] = mce->misc;
2832 vcpu->arch.mcg_status = mce->mcg_status;
2833 banks[1] = mce->status;
2834 kvm_queue_exception(vcpu, MC_VECTOR);
2835 } else if (!(banks[1] & MCI_STATUS_VAL)
2836 || !(banks[1] & MCI_STATUS_UC)) {
2837 if (banks[1] & MCI_STATUS_VAL)
2838 mce->status |= MCI_STATUS_OVER;
2839 banks[2] = mce->addr;
2840 banks[3] = mce->misc;
2841 banks[1] = mce->status;
2842 } else
2843 banks[1] |= MCI_STATUS_OVER;
2844 return 0;
2845}
2846
2847static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2848 struct kvm_vcpu_events *events)
2849{
2850 process_nmi(vcpu);
2851 events->exception.injected =
2852 vcpu->arch.exception.pending &&
2853 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2854 events->exception.nr = vcpu->arch.exception.nr;
2855 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2856 events->exception.pad = 0;
2857 events->exception.error_code = vcpu->arch.exception.error_code;
2858
2859 events->interrupt.injected =
2860 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2861 events->interrupt.nr = vcpu->arch.interrupt.nr;
2862 events->interrupt.soft = 0;
2863 events->interrupt.shadow =
2864 kvm_x86_ops->get_interrupt_shadow(vcpu,
2865 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2866
2867 events->nmi.injected = vcpu->arch.nmi_injected;
2868 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2869 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2870 events->nmi.pad = 0;
2871
2872 events->sipi_vector = vcpu->arch.sipi_vector;
2873
2874 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2875 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2876 | KVM_VCPUEVENT_VALID_SHADOW);
2877 memset(&events->reserved, 0, sizeof(events->reserved));
2878}
2879
2880static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2881 struct kvm_vcpu_events *events)
2882{
2883 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2884 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2885 | KVM_VCPUEVENT_VALID_SHADOW))
2886 return -EINVAL;
2887
2888 process_nmi(vcpu);
2889 vcpu->arch.exception.pending = events->exception.injected;
2890 vcpu->arch.exception.nr = events->exception.nr;
2891 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2892 vcpu->arch.exception.error_code = events->exception.error_code;
2893
2894 vcpu->arch.interrupt.pending = events->interrupt.injected;
2895 vcpu->arch.interrupt.nr = events->interrupt.nr;
2896 vcpu->arch.interrupt.soft = events->interrupt.soft;
2897 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2898 kvm_x86_ops->set_interrupt_shadow(vcpu,
2899 events->interrupt.shadow);
2900
2901 vcpu->arch.nmi_injected = events->nmi.injected;
2902 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2903 vcpu->arch.nmi_pending = events->nmi.pending;
2904 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2905
2906 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2907 vcpu->arch.sipi_vector = events->sipi_vector;
2908
2909 kvm_make_request(KVM_REQ_EVENT, vcpu);
2910
2911 return 0;
2912}
2913
2914static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2915 struct kvm_debugregs *dbgregs)
2916{
2917 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2918 dbgregs->dr6 = vcpu->arch.dr6;
2919 dbgregs->dr7 = vcpu->arch.dr7;
2920 dbgregs->flags = 0;
2921 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2922}
2923
2924static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2925 struct kvm_debugregs *dbgregs)
2926{
2927 if (dbgregs->flags)
2928 return -EINVAL;
2929
2930 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2931 vcpu->arch.dr6 = dbgregs->dr6;
2932 vcpu->arch.dr7 = dbgregs->dr7;
2933
2934 return 0;
2935}
2936
2937static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2938 struct kvm_xsave *guest_xsave)
2939{
2940 if (cpu_has_xsave)
2941 memcpy(guest_xsave->region,
2942 &vcpu->arch.guest_fpu.state->xsave,
2943 xstate_size);
2944 else {
2945 memcpy(guest_xsave->region,
2946 &vcpu->arch.guest_fpu.state->fxsave,
2947 sizeof(struct i387_fxsave_struct));
2948 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2949 XSTATE_FPSSE;
2950 }
2951}
2952
2953static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2954 struct kvm_xsave *guest_xsave)
2955{
2956 u64 xstate_bv =
2957 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2958
2959 if (cpu_has_xsave)
2960 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2961 guest_xsave->region, xstate_size);
2962 else {
2963 if (xstate_bv & ~XSTATE_FPSSE)
2964 return -EINVAL;
2965 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
2966 guest_xsave->region, sizeof(struct i387_fxsave_struct));
2967 }
2968 return 0;
2969}
2970
2971static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
2972 struct kvm_xcrs *guest_xcrs)
2973{
2974 if (!cpu_has_xsave) {
2975 guest_xcrs->nr_xcrs = 0;
2976 return;
2977 }
2978
2979 guest_xcrs->nr_xcrs = 1;
2980 guest_xcrs->flags = 0;
2981 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
2982 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
2983}
2984
2985static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2986 struct kvm_xcrs *guest_xcrs)
2987{
2988 int i, r = 0;
2989
2990 if (!cpu_has_xsave)
2991 return -EINVAL;
2992
2993 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
2994 return -EINVAL;
2995
2996 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
2997
2998 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
2999 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3000 guest_xcrs->xcrs[0].value);
3001 break;
3002 }
3003 if (r)
3004 r = -EINVAL;
3005 return r;
3006}
3007
3008long kvm_arch_vcpu_ioctl(struct file *filp,
3009 unsigned int ioctl, unsigned long arg)
3010{
3011 struct kvm_vcpu *vcpu = filp->private_data;
3012 void __user *argp = (void __user *)arg;
3013 int r;
3014 union {
3015 struct kvm_lapic_state *lapic;
3016 struct kvm_xsave *xsave;
3017 struct kvm_xcrs *xcrs;
3018 void *buffer;
3019 } u;
3020
3021 u.buffer = NULL;
3022 switch (ioctl) {
3023 case KVM_GET_LAPIC: {
3024 r = -EINVAL;
3025 if (!vcpu->arch.apic)
3026 goto out;
3027 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3028
3029 r = -ENOMEM;
3030 if (!u.lapic)
3031 goto out;
3032 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3033 if (r)
3034 goto out;
3035 r = -EFAULT;
3036 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3037 goto out;
3038 r = 0;
3039 break;
3040 }
3041 case KVM_SET_LAPIC: {
3042 r = -EINVAL;
3043 if (!vcpu->arch.apic)
3044 goto out;
3045 u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3046 r = -ENOMEM;
3047 if (!u.lapic)
3048 goto out;
3049 r = -EFAULT;
3050 if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
3051 goto out;
3052 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3053 if (r)
3054 goto out;
3055 r = 0;
3056 break;
3057 }
3058 case KVM_INTERRUPT: {
3059 struct kvm_interrupt irq;
3060
3061 r = -EFAULT;
3062 if (copy_from_user(&irq, argp, sizeof irq))
3063 goto out;
3064 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3065 if (r)
3066 goto out;
3067 r = 0;
3068 break;
3069 }
3070 case KVM_NMI: {
3071 r = kvm_vcpu_ioctl_nmi(vcpu);
3072 if (r)
3073 goto out;
3074 r = 0;
3075 break;
3076 }
3077 case KVM_SET_CPUID: {
3078 struct kvm_cpuid __user *cpuid_arg = argp;
3079 struct kvm_cpuid cpuid;
3080
3081 r = -EFAULT;
3082 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3083 goto out;
3084 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3085 if (r)
3086 goto out;
3087 break;
3088 }
3089 case KVM_SET_CPUID2: {
3090 struct kvm_cpuid2 __user *cpuid_arg = argp;
3091 struct kvm_cpuid2 cpuid;
3092
3093 r = -EFAULT;
3094 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3095 goto out;
3096 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3097 cpuid_arg->entries);
3098 if (r)
3099 goto out;
3100 break;
3101 }
3102 case KVM_GET_CPUID2: {
3103 struct kvm_cpuid2 __user *cpuid_arg = argp;
3104 struct kvm_cpuid2 cpuid;
3105
3106 r = -EFAULT;
3107 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3108 goto out;
3109 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3110 cpuid_arg->entries);
3111 if (r)
3112 goto out;
3113 r = -EFAULT;
3114 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3115 goto out;
3116 r = 0;
3117 break;
3118 }
3119 case KVM_GET_MSRS:
3120 r = msr_io(vcpu, argp, kvm_get_msr, 1);
3121 break;
3122 case KVM_SET_MSRS:
3123 r = msr_io(vcpu, argp, do_set_msr, 0);
3124 break;
3125 case KVM_TPR_ACCESS_REPORTING: {
3126 struct kvm_tpr_access_ctl tac;
3127
3128 r = -EFAULT;
3129 if (copy_from_user(&tac, argp, sizeof tac))
3130 goto out;
3131 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3132 if (r)
3133 goto out;
3134 r = -EFAULT;
3135 if (copy_to_user(argp, &tac, sizeof tac))
3136 goto out;
3137 r = 0;
3138 break;
3139 };
3140 case KVM_SET_VAPIC_ADDR: {
3141 struct kvm_vapic_addr va;
3142
3143 r = -EINVAL;
3144 if (!irqchip_in_kernel(vcpu->kvm))
3145 goto out;
3146 r = -EFAULT;
3147 if (copy_from_user(&va, argp, sizeof va))
3148 goto out;
3149 r = 0;
3150 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3151 break;
3152 }
3153 case KVM_X86_SETUP_MCE: {
3154 u64 mcg_cap;
3155
3156 r = -EFAULT;
3157 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3158 goto out;
3159 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3160 break;
3161 }
3162 case KVM_X86_SET_MCE: {
3163 struct kvm_x86_mce mce;
3164
3165 r = -EFAULT;
3166 if (copy_from_user(&mce, argp, sizeof mce))
3167 goto out;
3168 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3169 break;
3170 }
3171 case KVM_GET_VCPU_EVENTS: {
3172 struct kvm_vcpu_events events;
3173
3174 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3175
3176 r = -EFAULT;
3177 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3178 break;
3179 r = 0;
3180 break;
3181 }
3182 case KVM_SET_VCPU_EVENTS: {
3183 struct kvm_vcpu_events events;
3184
3185 r = -EFAULT;
3186 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3187 break;
3188
3189 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3190 break;
3191 }
3192 case KVM_GET_DEBUGREGS: {
3193 struct kvm_debugregs dbgregs;
3194
3195 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3196
3197 r = -EFAULT;
3198 if (copy_to_user(argp, &dbgregs,
3199 sizeof(struct kvm_debugregs)))
3200 break;
3201 r = 0;
3202 break;
3203 }
3204 case KVM_SET_DEBUGREGS: {
3205 struct kvm_debugregs dbgregs;
3206
3207 r = -EFAULT;
3208 if (copy_from_user(&dbgregs, argp,
3209 sizeof(struct kvm_debugregs)))
3210 break;
3211
3212 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3213 break;
3214 }
3215 case KVM_GET_XSAVE: {
3216 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3217 r = -ENOMEM;
3218 if (!u.xsave)
3219 break;
3220
3221 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3222
3223 r = -EFAULT;
3224 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3225 break;
3226 r = 0;
3227 break;
3228 }
3229 case KVM_SET_XSAVE: {
3230 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3231 r = -ENOMEM;
3232 if (!u.xsave)
3233 break;
3234
3235 r = -EFAULT;
3236 if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
3237 break;
3238
3239 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3240 break;
3241 }
3242 case KVM_GET_XCRS: {
3243 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3244 r = -ENOMEM;
3245 if (!u.xcrs)
3246 break;
3247
3248 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3249
3250 r = -EFAULT;
3251 if (copy_to_user(argp, u.xcrs,
3252 sizeof(struct kvm_xcrs)))
3253 break;
3254 r = 0;
3255 break;
3256 }
3257 case KVM_SET_XCRS: {
3258 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3259 r = -ENOMEM;
3260 if (!u.xcrs)
3261 break;
3262
3263 r = -EFAULT;
3264 if (copy_from_user(u.xcrs, argp,
3265 sizeof(struct kvm_xcrs)))
3266 break;
3267
3268 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3269 break;
3270 }
3271 case KVM_SET_TSC_KHZ: {
3272 u32 user_tsc_khz;
3273
3274 r = -EINVAL;
3275 if (!kvm_has_tsc_control)
3276 break;
3277
3278 user_tsc_khz = (u32)arg;
3279
3280 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3281 goto out;
3282
3283 kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
3284
3285 r = 0;
3286 goto out;
3287 }
3288 case KVM_GET_TSC_KHZ: {
3289 r = -EIO;
3290 if (check_tsc_unstable())
3291 goto out;
3292
3293 r = vcpu_tsc_khz(vcpu);
3294
3295 goto out;
3296 }
3297 default:
3298 r = -EINVAL;
3299 }
3300out:
3301 kfree(u.buffer);
3302 return r;
3303}
3304
3305static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3306{
3307 int ret;
3308
3309 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3310 return -1;
3311 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3312 return ret;
3313}
3314
3315static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3316 u64 ident_addr)
3317{
3318 kvm->arch.ept_identity_map_addr = ident_addr;
3319 return 0;
3320}
3321
3322static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3323 u32 kvm_nr_mmu_pages)
3324{
3325 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3326 return -EINVAL;
3327
3328 mutex_lock(&kvm->slots_lock);
3329 spin_lock(&kvm->mmu_lock);
3330
3331 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3332 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3333
3334 spin_unlock(&kvm->mmu_lock);
3335 mutex_unlock(&kvm->slots_lock);
3336 return 0;
3337}
3338
3339static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3340{
3341 return kvm->arch.n_max_mmu_pages;
3342}
3343
3344static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3345{
3346 int r;
3347
3348 r = 0;
3349 switch (chip->chip_id) {
3350 case KVM_IRQCHIP_PIC_MASTER:
3351 memcpy(&chip->chip.pic,
3352 &pic_irqchip(kvm)->pics[0],
3353 sizeof(struct kvm_pic_state));
3354 break;
3355 case KVM_IRQCHIP_PIC_SLAVE:
3356 memcpy(&chip->chip.pic,
3357 &pic_irqchip(kvm)->pics[1],
3358 sizeof(struct kvm_pic_state));
3359 break;
3360 case KVM_IRQCHIP_IOAPIC:
3361 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3362 break;
3363 default:
3364 r = -EINVAL;
3365 break;
3366 }
3367 return r;
3368}
3369
3370static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3371{
3372 int r;
3373
3374 r = 0;
3375 switch (chip->chip_id) {
3376 case KVM_IRQCHIP_PIC_MASTER:
3377 spin_lock(&pic_irqchip(kvm)->lock);
3378 memcpy(&pic_irqchip(kvm)->pics[0],
3379 &chip->chip.pic,
3380 sizeof(struct kvm_pic_state));
3381 spin_unlock(&pic_irqchip(kvm)->lock);
3382 break;
3383 case KVM_IRQCHIP_PIC_SLAVE:
3384 spin_lock(&pic_irqchip(kvm)->lock);
3385 memcpy(&pic_irqchip(kvm)->pics[1],
3386 &chip->chip.pic,
3387 sizeof(struct kvm_pic_state));
3388 spin_unlock(&pic_irqchip(kvm)->lock);
3389 break;
3390 case KVM_IRQCHIP_IOAPIC:
3391 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3392 break;
3393 default:
3394 r = -EINVAL;
3395 break;
3396 }
3397 kvm_pic_update_irq(pic_irqchip(kvm));
3398 return r;
3399}
3400
3401static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3402{
3403 int r = 0;
3404
3405 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3406 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3407 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3408 return r;
3409}
3410
3411static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3412{
3413 int r = 0;
3414
3415 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3416 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3417 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3418 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3419 return r;
3420}
3421
3422static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3423{
3424 int r = 0;
3425
3426 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3427 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3428 sizeof(ps->channels));
3429 ps->flags = kvm->arch.vpit->pit_state.flags;
3430 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3431 memset(&ps->reserved, 0, sizeof(ps->reserved));
3432 return r;
3433}
3434
3435static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3436{
3437 int r = 0, start = 0;
3438 u32 prev_legacy, cur_legacy;
3439 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3440 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3441 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3442 if (!prev_legacy && cur_legacy)
3443 start = 1;
3444 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3445 sizeof(kvm->arch.vpit->pit_state.channels));
3446 kvm->arch.vpit->pit_state.flags = ps->flags;
3447 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3448 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3449 return r;
3450}
3451
3452static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3453 struct kvm_reinject_control *control)
3454{
3455 if (!kvm->arch.vpit)
3456 return -ENXIO;
3457 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3458 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
3459 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3460 return 0;
3461}
3462
3463
3464
3465
3466int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3467 struct kvm_dirty_log *log)
3468{
3469 int r, i;
3470 struct kvm_memory_slot *memslot;
3471 unsigned long n;
3472 unsigned long is_dirty = 0;
3473
3474 mutex_lock(&kvm->slots_lock);
3475
3476 r = -EINVAL;
3477 if (log->slot >= KVM_MEMORY_SLOTS)
3478 goto out;
3479
3480 memslot = &kvm->memslots->memslots[log->slot];
3481 r = -ENOENT;
3482 if (!memslot->dirty_bitmap)
3483 goto out;
3484
3485 n = kvm_dirty_bitmap_bytes(memslot);
3486
3487 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
3488 is_dirty = memslot->dirty_bitmap[i];
3489
3490
3491 if (is_dirty) {
3492 struct kvm_memslots *slots, *old_slots;
3493 unsigned long *dirty_bitmap;
3494
3495 dirty_bitmap = memslot->dirty_bitmap_head;
3496 if (memslot->dirty_bitmap == dirty_bitmap)
3497 dirty_bitmap += n / sizeof(long);
3498 memset(dirty_bitmap, 0, n);
3499
3500 r = -ENOMEM;
3501 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
3502 if (!slots)
3503 goto out;
3504 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
3505 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
3506 slots->generation++;
3507
3508 old_slots = kvm->memslots;
3509 rcu_assign_pointer(kvm->memslots, slots);
3510 synchronize_srcu_expedited(&kvm->srcu);
3511 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
3512 kfree(old_slots);
3513
3514 spin_lock(&kvm->mmu_lock);
3515 kvm_mmu_slot_remove_write_access(kvm, log->slot);
3516 spin_unlock(&kvm->mmu_lock);
3517
3518 r = -EFAULT;
3519 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
3520 goto out;
3521 } else {
3522 r = -EFAULT;
3523 if (clear_user(log->dirty_bitmap, n))
3524 goto out;
3525 }
3526
3527 r = 0;
3528out:
3529 mutex_unlock(&kvm->slots_lock);
3530 return r;
3531}
3532
3533long kvm_arch_vm_ioctl(struct file *filp,
3534 unsigned int ioctl, unsigned long arg)
3535{
3536 struct kvm *kvm = filp->private_data;
3537 void __user *argp = (void __user *)arg;
3538 int r = -ENOTTY;
3539
3540
3541
3542
3543
3544 union {
3545 struct kvm_pit_state ps;
3546 struct kvm_pit_state2 ps2;
3547 struct kvm_pit_config pit_config;
3548 } u;
3549
3550 switch (ioctl) {
3551 case KVM_SET_TSS_ADDR:
3552 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3553 if (r < 0)
3554 goto out;
3555 break;
3556 case KVM_SET_IDENTITY_MAP_ADDR: {
3557 u64 ident_addr;
3558
3559 r = -EFAULT;
3560 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3561 goto out;
3562 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3563 if (r < 0)
3564 goto out;
3565 break;
3566 }
3567 case KVM_SET_NR_MMU_PAGES:
3568 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3569 if (r)
3570 goto out;
3571 break;
3572 case KVM_GET_NR_MMU_PAGES:
3573 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3574 break;
3575 case KVM_CREATE_IRQCHIP: {
3576 struct kvm_pic *vpic;
3577
3578 mutex_lock(&kvm->lock);
3579 r = -EEXIST;
3580 if (kvm->arch.vpic)
3581 goto create_irqchip_unlock;
3582 r = -ENOMEM;
3583 vpic = kvm_create_pic(kvm);
3584 if (vpic) {
3585 r = kvm_ioapic_init(kvm);
3586 if (r) {
3587 mutex_lock(&kvm->slots_lock);
3588 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3589 &vpic->dev_master);
3590 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3591 &vpic->dev_slave);
3592 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3593 &vpic->dev_eclr);
3594 mutex_unlock(&kvm->slots_lock);
3595 kfree(vpic);
3596 goto create_irqchip_unlock;
3597 }
3598 } else
3599 goto create_irqchip_unlock;
3600 smp_wmb();
3601 kvm->arch.vpic = vpic;
3602 smp_wmb();
3603 r = kvm_setup_default_irq_routing(kvm);
3604 if (r) {
3605 mutex_lock(&kvm->slots_lock);
3606 mutex_lock(&kvm->irq_lock);
3607 kvm_ioapic_destroy(kvm);
3608 kvm_destroy_pic(kvm);
3609 mutex_unlock(&kvm->irq_lock);
3610 mutex_unlock(&kvm->slots_lock);
3611 }
3612 create_irqchip_unlock:
3613 mutex_unlock(&kvm->lock);
3614 break;
3615 }
3616 case KVM_CREATE_PIT:
3617 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3618 goto create_pit;
3619 case KVM_CREATE_PIT2:
3620 r = -EFAULT;
3621 if (copy_from_user(&u.pit_config, argp,
3622 sizeof(struct kvm_pit_config)))
3623 goto out;
3624 create_pit:
3625 mutex_lock(&kvm->slots_lock);
3626 r = -EEXIST;
3627 if (kvm->arch.vpit)
3628 goto create_pit_unlock;
3629 r = -ENOMEM;
3630 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3631 if (kvm->arch.vpit)
3632 r = 0;
3633 create_pit_unlock:
3634 mutex_unlock(&kvm->slots_lock);
3635 break;
3636 case KVM_IRQ_LINE_STATUS:
3637 case KVM_IRQ_LINE: {
3638 struct kvm_irq_level irq_event;
3639
3640 r = -EFAULT;
3641 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3642 goto out;
3643 r = -ENXIO;
3644 if (irqchip_in_kernel(kvm)) {
3645 __s32 status;
3646 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3647 irq_event.irq, irq_event.level);
3648 if (ioctl == KVM_IRQ_LINE_STATUS) {
3649 r = -EFAULT;
3650 irq_event.status = status;
3651 if (copy_to_user(argp, &irq_event,
3652 sizeof irq_event))
3653 goto out;
3654 }
3655 r = 0;
3656 }
3657 break;
3658 }
3659 case KVM_GET_IRQCHIP: {
3660
3661 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3662
3663 r = -ENOMEM;
3664 if (!chip)
3665 goto out;
3666 r = -EFAULT;
3667 if (copy_from_user(chip, argp, sizeof *chip))
3668 goto get_irqchip_out;
3669 r = -ENXIO;
3670 if (!irqchip_in_kernel(kvm))
3671 goto get_irqchip_out;
3672 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3673 if (r)
3674 goto get_irqchip_out;
3675 r = -EFAULT;
3676 if (copy_to_user(argp, chip, sizeof *chip))
3677 goto get_irqchip_out;
3678 r = 0;
3679 get_irqchip_out:
3680 kfree(chip);
3681 if (r)
3682 goto out;
3683 break;
3684 }
3685 case KVM_SET_IRQCHIP: {
3686
3687 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3688
3689 r = -ENOMEM;
3690 if (!chip)
3691 goto out;
3692 r = -EFAULT;
3693 if (copy_from_user(chip, argp, sizeof *chip))
3694 goto set_irqchip_out;
3695 r = -ENXIO;
3696 if (!irqchip_in_kernel(kvm))
3697 goto set_irqchip_out;
3698 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3699 if (r)
3700 goto set_irqchip_out;
3701 r = 0;
3702 set_irqchip_out:
3703 kfree(chip);
3704 if (r)
3705 goto out;
3706 break;
3707 }
3708 case KVM_GET_PIT: {
3709 r = -EFAULT;
3710 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3711 goto out;
3712 r = -ENXIO;
3713 if (!kvm->arch.vpit)
3714 goto out;
3715 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3716 if (r)
3717 goto out;
3718 r = -EFAULT;
3719 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3720 goto out;
3721 r = 0;
3722 break;
3723 }
3724 case KVM_SET_PIT: {
3725 r = -EFAULT;
3726 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3727 goto out;
3728 r = -ENXIO;
3729 if (!kvm->arch.vpit)
3730 goto out;
3731 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3732 if (r)
3733 goto out;
3734 r = 0;
3735 break;
3736 }
3737 case KVM_GET_PIT2: {
3738 r = -ENXIO;
3739 if (!kvm->arch.vpit)
3740 goto out;
3741 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3742 if (r)
3743 goto out;
3744 r = -EFAULT;
3745 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3746 goto out;
3747 r = 0;
3748 break;
3749 }
3750 case KVM_SET_PIT2: {
3751 r = -EFAULT;
3752 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3753 goto out;
3754 r = -ENXIO;
3755 if (!kvm->arch.vpit)
3756 goto out;
3757 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3758 if (r)
3759 goto out;
3760 r = 0;
3761 break;
3762 }
3763 case KVM_REINJECT_CONTROL: {
3764 struct kvm_reinject_control control;
3765 r = -EFAULT;
3766 if (copy_from_user(&control, argp, sizeof(control)))
3767 goto out;
3768 r = kvm_vm_ioctl_reinject(kvm, &control);
3769 if (r)
3770 goto out;
3771 r = 0;
3772 break;
3773 }
3774 case KVM_XEN_HVM_CONFIG: {
3775 r = -EFAULT;
3776 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3777 sizeof(struct kvm_xen_hvm_config)))
3778 goto out;
3779 r = -EINVAL;
3780 if (kvm->arch.xen_hvm_config.flags)
3781 goto out;
3782 r = 0;
3783 break;
3784 }
3785 case KVM_SET_CLOCK: {
3786 struct kvm_clock_data user_ns;
3787 u64 now_ns;
3788 s64 delta;
3789
3790 r = -EFAULT;
3791 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3792 goto out;
3793
3794 r = -EINVAL;
3795 if (user_ns.flags)
3796 goto out;
3797
3798 r = 0;
3799 local_irq_disable();
3800 now_ns = get_kernel_ns();
3801 delta = user_ns.clock - now_ns;
3802 local_irq_enable();
3803 kvm->arch.kvmclock_offset = delta;
3804 break;
3805 }
3806 case KVM_GET_CLOCK: {
3807 struct kvm_clock_data user_ns;
3808 u64 now_ns;
3809
3810 local_irq_disable();
3811 now_ns = get_kernel_ns();
3812 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3813 local_irq_enable();
3814 user_ns.flags = 0;
3815 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3816
3817 r = -EFAULT;
3818 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3819 goto out;
3820 r = 0;
3821 break;
3822 }
3823
3824 default:
3825 ;
3826 }
3827out:
3828 return r;
3829}
3830
3831static void kvm_init_msr_list(void)
3832{
3833 u32 dummy[2];
3834 unsigned i, j;
3835
3836
3837 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3838 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3839 continue;
3840 if (j < i)
3841 msrs_to_save[j] = msrs_to_save[i];
3842 j++;
3843 }
3844 num_msrs_to_save = j;
3845}
3846
3847static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3848 const void *v)
3849{
3850 int handled = 0;
3851 int n;
3852
3853 do {
3854 n = min(len, 8);
3855 if (!(vcpu->arch.apic &&
3856 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3857 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3858 break;
3859 handled += n;
3860 addr += n;
3861 len -= n;
3862 v += n;
3863 } while (len);
3864
3865 return handled;
3866}
3867
3868static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3869{
3870 int handled = 0;
3871 int n;
3872
3873 do {
3874 n = min(len, 8);
3875 if (!(vcpu->arch.apic &&
3876 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3877 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3878 break;
3879 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3880 handled += n;
3881 addr += n;
3882 len -= n;
3883 v += n;
3884 } while (len);
3885
3886 return handled;
3887}
3888
3889static void kvm_set_segment(struct kvm_vcpu *vcpu,
3890 struct kvm_segment *var, int seg)
3891{
3892 kvm_x86_ops->set_segment(vcpu, var, seg);
3893}
3894
3895void kvm_get_segment(struct kvm_vcpu *vcpu,
3896 struct kvm_segment *var, int seg)
3897{
3898 kvm_x86_ops->get_segment(vcpu, var, seg);
3899}
3900
3901static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3902{
3903 return gpa;
3904}
3905
3906static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3907{
3908 gpa_t t_gpa;
3909 struct x86_exception exception;
3910
3911 BUG_ON(!mmu_is_nested(vcpu));
3912
3913
3914 access |= PFERR_USER_MASK;
3915 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3916
3917 return t_gpa;
3918}
3919
3920gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3921 struct x86_exception *exception)
3922{
3923 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3924 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3925}
3926
3927 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3928 struct x86_exception *exception)
3929{
3930 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3931 access |= PFERR_FETCH_MASK;
3932 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3933}
3934
3935gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3936 struct x86_exception *exception)
3937{
3938 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3939 access |= PFERR_WRITE_MASK;
3940 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3941}
3942
3943
3944gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3945 struct x86_exception *exception)
3946{
3947 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3948}
3949
3950static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3951 struct kvm_vcpu *vcpu, u32 access,
3952 struct x86_exception *exception)
3953{
3954 void *data = val;
3955 int r = X86EMUL_CONTINUE;
3956
3957 while (bytes) {
3958 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3959 exception);
3960 unsigned offset = addr & (PAGE_SIZE-1);
3961 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3962 int ret;
3963
3964 if (gpa == UNMAPPED_GVA)
3965 return X86EMUL_PROPAGATE_FAULT;
3966 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3967 if (ret < 0) {
3968 r = X86EMUL_IO_NEEDED;
3969 goto out;
3970 }
3971
3972 bytes -= toread;
3973 data += toread;
3974 addr += toread;
3975 }
3976out:
3977 return r;
3978}
3979
3980
3981static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3982 gva_t addr, void *val, unsigned int bytes,
3983 struct x86_exception *exception)
3984{
3985 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3986 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3987
3988 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3989 access | PFERR_FETCH_MASK,
3990 exception);
3991}
3992
3993int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3994 gva_t addr, void *val, unsigned int bytes,
3995 struct x86_exception *exception)
3996{
3997 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3998 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3999
4000 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4001 exception);
4002}
4003EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4004
4005static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4006 gva_t addr, void *val, unsigned int bytes,
4007 struct x86_exception *exception)
4008{
4009 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4010 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4011}
4012
4013int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4014 gva_t addr, void *val,
4015 unsigned int bytes,
4016 struct x86_exception *exception)
4017{
4018 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4019 void *data = val;
4020 int r = X86EMUL_CONTINUE;
4021
4022 while (bytes) {
4023 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4024 PFERR_WRITE_MASK,
4025 exception);
4026 unsigned offset = addr & (PAGE_SIZE-1);
4027 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4028 int ret;
4029
4030 if (gpa == UNMAPPED_GVA)
4031 return X86EMUL_PROPAGATE_FAULT;
4032 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
4033 if (ret < 0) {
4034 r = X86EMUL_IO_NEEDED;
4035 goto out;
4036 }
4037
4038 bytes -= towrite;
4039 data += towrite;
4040 addr += towrite;
4041 }
4042out:
4043 return r;
4044}
4045EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4046
4047static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4048 gpa_t *gpa, struct x86_exception *exception,
4049 bool write)
4050{
4051 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4052
4053 if (vcpu_match_mmio_gva(vcpu, gva) &&
4054 check_write_user_access(vcpu, write, access,
4055 vcpu->arch.access)) {
4056 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4057 (gva & (PAGE_SIZE - 1));
4058 trace_vcpu_match_mmio(gva, *gpa, write, false);
4059 return 1;
4060 }
4061
4062 if (write)
4063 access |= PFERR_WRITE_MASK;
4064
4065 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4066
4067 if (*gpa == UNMAPPED_GVA)
4068 return -1;
4069
4070
4071 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4072 return 1;
4073
4074 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4075 trace_vcpu_match_mmio(gva, *gpa, write, true);
4076 return 1;
4077 }
4078
4079 return 0;
4080}
4081
4082int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4083 const void *val, int bytes)
4084{
4085 int ret;
4086
4087 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4088 if (ret < 0)
4089 return 0;
4090 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
4091 return 1;
4092}
4093
4094struct read_write_emulator_ops {
4095 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4096 int bytes);
4097 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4098 void *val, int bytes);
4099 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4100 int bytes, void *val);
4101 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4102 void *val, int bytes);
4103 bool write;
4104};
4105
4106static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4107{
4108 if (vcpu->mmio_read_completed) {
4109 memcpy(val, vcpu->mmio_data, bytes);
4110 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4111 vcpu->mmio_phys_addr, *(u64 *)val);
4112 vcpu->mmio_read_completed = 0;
4113 return 1;
4114 }
4115
4116 return 0;
4117}
4118
4119static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4120 void *val, int bytes)
4121{
4122 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
4123}
4124
4125static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4126 void *val, int bytes)
4127{
4128 return emulator_write_phys(vcpu, gpa, val, bytes);
4129}
4130
4131static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4132{
4133 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4134 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4135}
4136
4137static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4138 void *val, int bytes)
4139{
4140 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4141 return X86EMUL_IO_NEEDED;
4142}
4143
4144static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4145 void *val, int bytes)
4146{
4147 memcpy(vcpu->mmio_data, val, bytes);
4148 memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
4149 return X86EMUL_CONTINUE;
4150}
4151
4152static struct read_write_emulator_ops read_emultor = {
4153 .read_write_prepare = read_prepare,
4154 .read_write_emulate = read_emulate,
4155 .read_write_mmio = vcpu_mmio_read,
4156 .read_write_exit_mmio = read_exit_mmio,
4157};
4158
4159static struct read_write_emulator_ops write_emultor = {
4160 .read_write_emulate = write_emulate,
4161 .read_write_mmio = write_mmio,
4162 .read_write_exit_mmio = write_exit_mmio,
4163 .write = true,
4164};
4165
4166static int emulator_read_write_onepage(unsigned long addr, void *val,
4167 unsigned int bytes,
4168 struct x86_exception *exception,
4169 struct kvm_vcpu *vcpu,
4170 struct read_write_emulator_ops *ops)
4171{
4172 gpa_t gpa;
4173 int handled, ret;
4174 bool write = ops->write;
4175
4176 if (ops->read_write_prepare &&
4177 ops->read_write_prepare(vcpu, val, bytes))
4178 return X86EMUL_CONTINUE;
4179
4180 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4181
4182 if (ret < 0)
4183 return X86EMUL_PROPAGATE_FAULT;
4184
4185
4186 if (ret)
4187 goto mmio;
4188
4189 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4190 return X86EMUL_CONTINUE;
4191
4192mmio:
4193
4194
4195
4196 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4197 if (handled == bytes)
4198 return X86EMUL_CONTINUE;
4199
4200 gpa += handled;
4201 bytes -= handled;
4202 val += handled;
4203
4204 vcpu->mmio_needed = 1;
4205 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4206 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
4207 vcpu->mmio_size = bytes;
4208 vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
4209 vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
4210 vcpu->mmio_index = 0;
4211
4212 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4213}
4214
4215int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
4216 void *val, unsigned int bytes,
4217 struct x86_exception *exception,
4218 struct read_write_emulator_ops *ops)
4219{
4220 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4221
4222
4223 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4224 int rc, now;
4225
4226 now = -addr & ~PAGE_MASK;
4227 rc = emulator_read_write_onepage(addr, val, now, exception,
4228 vcpu, ops);
4229
4230 if (rc != X86EMUL_CONTINUE)
4231 return rc;
4232 addr += now;
4233 val += now;
4234 bytes -= now;
4235 }
4236
4237 return emulator_read_write_onepage(addr, val, bytes, exception,
4238 vcpu, ops);
4239}
4240
4241static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4242 unsigned long addr,
4243 void *val,
4244 unsigned int bytes,
4245 struct x86_exception *exception)
4246{
4247 return emulator_read_write(ctxt, addr, val, bytes,
4248 exception, &read_emultor);
4249}
4250
4251int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4252 unsigned long addr,
4253 const void *val,
4254 unsigned int bytes,
4255 struct x86_exception *exception)
4256{
4257 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4258 exception, &write_emultor);
4259}
4260
4261#define CMPXCHG_TYPE(t, ptr, old, new) \
4262 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4263
4264#ifdef CONFIG_X86_64
4265# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4266#else
4267# define CMPXCHG64(ptr, old, new) \
4268 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4269#endif
4270
4271static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4272 unsigned long addr,
4273 const void *old,
4274 const void *new,
4275 unsigned int bytes,
4276 struct x86_exception *exception)
4277{
4278 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4279 gpa_t gpa;
4280 struct page *page;
4281 char *kaddr;
4282 bool exchanged;
4283
4284
4285 if (bytes > 8 || (bytes & (bytes - 1)))
4286 goto emul_write;
4287
4288 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4289
4290 if (gpa == UNMAPPED_GVA ||
4291 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4292 goto emul_write;
4293
4294 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4295 goto emul_write;
4296
4297 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4298 if (is_error_page(page)) {
4299 kvm_release_page_clean(page);
4300 goto emul_write;
4301 }
4302
4303 kaddr = kmap_atomic(page, KM_USER0);
4304 kaddr += offset_in_page(gpa);
4305 switch (bytes) {
4306 case 1:
4307 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4308 break;
4309 case 2:
4310 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4311 break;
4312 case 4:
4313 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4314 break;
4315 case 8:
4316 exchanged = CMPXCHG64(kaddr, old, new);
4317 break;
4318 default:
4319 BUG();
4320 }
4321 kunmap_atomic(kaddr, KM_USER0);
4322 kvm_release_page_dirty(page);
4323
4324 if (!exchanged)
4325 return X86EMUL_CMPXCHG_FAILED;
4326
4327 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
4328
4329 return X86EMUL_CONTINUE;
4330
4331emul_write:
4332 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4333
4334 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4335}
4336
4337static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4338{
4339
4340 int r;
4341
4342 if (vcpu->arch.pio.in)
4343 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4344 vcpu->arch.pio.size, pd);
4345 else
4346 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4347 vcpu->arch.pio.port, vcpu->arch.pio.size,
4348 pd);
4349 return r;
4350}
4351
4352
4353static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4354 int size, unsigned short port, void *val,
4355 unsigned int count)
4356{
4357 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4358
4359 if (vcpu->arch.pio.count)
4360 goto data_avail;
4361
4362 trace_kvm_pio(0, port, size, count);
4363
4364 vcpu->arch.pio.port = port;
4365 vcpu->arch.pio.in = 1;
4366 vcpu->arch.pio.count = count;
4367 vcpu->arch.pio.size = size;
4368
4369 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4370 data_avail:
4371 memcpy(val, vcpu->arch.pio_data, size * count);
4372 vcpu->arch.pio.count = 0;
4373 return 1;
4374 }
4375
4376 vcpu->run->exit_reason = KVM_EXIT_IO;
4377 vcpu->run->io.direction = KVM_EXIT_IO_IN;
4378 vcpu->run->io.size = size;
4379 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4380 vcpu->run->io.count = count;
4381 vcpu->run->io.port = port;
4382
4383 return 0;
4384}
4385
4386static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4387 int size, unsigned short port,
4388 const void *val, unsigned int count)
4389{
4390 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4391
4392 trace_kvm_pio(1, port, size, count);
4393
4394 vcpu->arch.pio.port = port;
4395 vcpu->arch.pio.in = 0;
4396 vcpu->arch.pio.count = count;
4397 vcpu->arch.pio.size = size;
4398
4399 memcpy(vcpu->arch.pio_data, val, size * count);
4400
4401 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4402 vcpu->arch.pio.count = 0;
4403 return 1;
4404 }
4405
4406 vcpu->run->exit_reason = KVM_EXIT_IO;
4407 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
4408 vcpu->run->io.size = size;
4409 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4410 vcpu->run->io.count = count;
4411 vcpu->run->io.port = port;
4412
4413 return 0;
4414}
4415
4416static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4417{
4418 return kvm_x86_ops->get_segment_base(vcpu, seg);
4419}
4420
4421static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4422{
4423 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4424}
4425
4426int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4427{
4428 if (!need_emulate_wbinvd(vcpu))
4429 return X86EMUL_CONTINUE;
4430
4431 if (kvm_x86_ops->has_wbinvd_exit()) {
4432 int cpu = get_cpu();
4433
4434 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4435 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4436 wbinvd_ipi, NULL, 1);
4437 put_cpu();
4438 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4439 } else
4440 wbinvd();
4441 return X86EMUL_CONTINUE;
4442}
4443EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4444
4445static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4446{
4447 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4448}
4449
4450int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4451{
4452 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4453}
4454
4455int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4456{
4457
4458 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4459}
4460
4461static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4462{
4463 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4464}
4465
4466static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4467{
4468 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4469 unsigned long value;
4470
4471 switch (cr) {
4472 case 0:
4473 value = kvm_read_cr0(vcpu);
4474 break;
4475 case 2:
4476 value = vcpu->arch.cr2;
4477 break;
4478 case 3:
4479 value = kvm_read_cr3(vcpu);
4480 break;
4481 case 4:
4482 value = kvm_read_cr4(vcpu);
4483 break;
4484 case 8:
4485 value = kvm_get_cr8(vcpu);
4486 break;
4487 default:
4488 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4489 return 0;
4490 }
4491
4492 return value;
4493}
4494
4495static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4496{
4497 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4498 int res = 0;
4499
4500 switch (cr) {
4501 case 0:
4502 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4503 break;
4504 case 2:
4505 vcpu->arch.cr2 = val;
4506 break;
4507 case 3:
4508 res = kvm_set_cr3(vcpu, val);
4509 break;
4510 case 4:
4511 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4512 break;
4513 case 8:
4514 res = kvm_set_cr8(vcpu, val);
4515 break;
4516 default:
4517 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4518 res = -1;
4519 }
4520
4521 return res;
4522}
4523
4524static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4525{
4526 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4527}
4528
4529static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4530{
4531 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4532}
4533
4534static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4535{
4536 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4537}
4538
4539static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4540{
4541 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4542}
4543
4544static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4545{
4546 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4547}
4548
4549static unsigned long emulator_get_cached_segment_base(
4550 struct x86_emulate_ctxt *ctxt, int seg)
4551{
4552 return get_segment_base(emul_to_vcpu(ctxt), seg);
4553}
4554
4555static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4556 struct desc_struct *desc, u32 *base3,
4557 int seg)
4558{
4559 struct kvm_segment var;
4560
4561 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4562 *selector = var.selector;
4563
4564 if (var.unusable)
4565 return false;
4566
4567 if (var.g)
4568 var.limit >>= 12;
4569 set_desc_limit(desc, var.limit);
4570 set_desc_base(desc, (unsigned long)var.base);
4571#ifdef CONFIG_X86_64
4572 if (base3)
4573 *base3 = var.base >> 32;
4574#endif
4575 desc->type = var.type;
4576 desc->s = var.s;
4577 desc->dpl = var.dpl;
4578 desc->p = var.present;
4579 desc->avl = var.avl;
4580 desc->l = var.l;
4581 desc->d = var.db;
4582 desc->g = var.g;
4583
4584 return true;
4585}
4586
4587static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4588 struct desc_struct *desc, u32 base3,
4589 int seg)
4590{
4591 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4592 struct kvm_segment var;
4593
4594 var.selector = selector;
4595 var.base = get_desc_base(desc);
4596#ifdef CONFIG_X86_64
4597 var.base |= ((u64)base3) << 32;
4598#endif
4599 var.limit = get_desc_limit(desc);
4600 if (desc->g)
4601 var.limit = (var.limit << 12) | 0xfff;
4602 var.type = desc->type;
4603 var.present = desc->p;
4604 var.dpl = desc->dpl;
4605 var.db = desc->d;
4606 var.s = desc->s;
4607 var.l = desc->l;
4608 var.g = desc->g;
4609 var.avl = desc->avl;
4610 var.present = desc->p;
4611 var.unusable = !var.present;
4612 var.padding = 0;
4613
4614 kvm_set_segment(vcpu, &var, seg);
4615 return;
4616}
4617
4618static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4619 u32 msr_index, u64 *pdata)
4620{
4621 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4622}
4623
4624static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4625 u32 msr_index, u64 data)
4626{
4627 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
4628}
4629
4630static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4631{
4632 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4633}
4634
4635static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4636{
4637 preempt_disable();
4638 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4639
4640
4641
4642
4643 clts();
4644}
4645
4646static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4647{
4648 preempt_enable();
4649}
4650
4651static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4652 struct x86_instruction_info *info,
4653 enum x86_intercept_stage stage)
4654{
4655 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4656}
4657
4658static struct x86_emulate_ops emulate_ops = {
4659 .read_std = kvm_read_guest_virt_system,
4660 .write_std = kvm_write_guest_virt_system,
4661 .fetch = kvm_fetch_guest_virt,
4662 .read_emulated = emulator_read_emulated,
4663 .write_emulated = emulator_write_emulated,
4664 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4665 .invlpg = emulator_invlpg,
4666 .pio_in_emulated = emulator_pio_in_emulated,
4667 .pio_out_emulated = emulator_pio_out_emulated,
4668 .get_segment = emulator_get_segment,
4669 .set_segment = emulator_set_segment,
4670 .get_cached_segment_base = emulator_get_cached_segment_base,
4671 .get_gdt = emulator_get_gdt,
4672 .get_idt = emulator_get_idt,
4673 .set_gdt = emulator_set_gdt,
4674 .set_idt = emulator_set_idt,
4675 .get_cr = emulator_get_cr,
4676 .set_cr = emulator_set_cr,
4677 .cpl = emulator_get_cpl,
4678 .get_dr = emulator_get_dr,
4679 .set_dr = emulator_set_dr,
4680 .set_msr = emulator_set_msr,
4681 .get_msr = emulator_get_msr,
4682 .halt = emulator_halt,
4683 .wbinvd = emulator_wbinvd,
4684 .fix_hypercall = emulator_fix_hypercall,
4685 .get_fpu = emulator_get_fpu,
4686 .put_fpu = emulator_put_fpu,
4687 .intercept = emulator_intercept,
4688};
4689
4690static void cache_all_regs(struct kvm_vcpu *vcpu)
4691{
4692 kvm_register_read(vcpu, VCPU_REGS_RAX);
4693 kvm_register_read(vcpu, VCPU_REGS_RSP);
4694 kvm_register_read(vcpu, VCPU_REGS_RIP);
4695 vcpu->arch.regs_dirty = ~0;
4696}
4697
4698static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4699{
4700 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4701
4702
4703
4704
4705
4706
4707
4708 if (!(int_shadow & mask))
4709 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4710}
4711
4712static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4713{
4714 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4715 if (ctxt->exception.vector == PF_VECTOR)
4716 kvm_propagate_fault(vcpu, &ctxt->exception);
4717 else if (ctxt->exception.error_code_valid)
4718 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4719 ctxt->exception.error_code);
4720 else
4721 kvm_queue_exception(vcpu, ctxt->exception.vector);
4722}
4723
4724static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
4725 const unsigned long *regs)
4726{
4727 memset(&ctxt->twobyte, 0,
4728 (void *)&ctxt->regs - (void *)&ctxt->twobyte);
4729 memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
4730
4731 ctxt->fetch.start = 0;
4732 ctxt->fetch.end = 0;
4733 ctxt->io_read.pos = 0;
4734 ctxt->io_read.end = 0;
4735 ctxt->mem_read.pos = 0;
4736 ctxt->mem_read.end = 0;
4737}
4738
4739static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4740{
4741 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4742 int cs_db, cs_l;
4743
4744
4745
4746
4747
4748
4749
4750 cache_all_regs(vcpu);
4751
4752 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4753
4754 ctxt->eflags = kvm_get_rflags(vcpu);
4755 ctxt->eip = kvm_rip_read(vcpu);
4756 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4757 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4758 cs_l ? X86EMUL_MODE_PROT64 :
4759 cs_db ? X86EMUL_MODE_PROT32 :
4760 X86EMUL_MODE_PROT16;
4761 ctxt->guest_mode = is_guest_mode(vcpu);
4762
4763 init_decode_cache(ctxt, vcpu->arch.regs);
4764 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4765}
4766
4767int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4768{
4769 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4770 int ret;
4771
4772 init_emulate_ctxt(vcpu);
4773
4774 ctxt->op_bytes = 2;
4775 ctxt->ad_bytes = 2;
4776 ctxt->_eip = ctxt->eip + inc_eip;
4777 ret = emulate_int_real(ctxt, irq);
4778
4779 if (ret != X86EMUL_CONTINUE)
4780 return EMULATE_FAIL;
4781
4782 ctxt->eip = ctxt->_eip;
4783 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
4784 kvm_rip_write(vcpu, ctxt->eip);
4785 kvm_set_rflags(vcpu, ctxt->eflags);
4786
4787 if (irq == NMI_VECTOR)
4788 vcpu->arch.nmi_pending = 0;
4789 else
4790 vcpu->arch.interrupt.pending = false;
4791
4792 return EMULATE_DONE;
4793}
4794EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4795
4796static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4797{
4798 int r = EMULATE_DONE;
4799
4800 ++vcpu->stat.insn_emulation_fail;
4801 trace_kvm_emulate_insn_failed(vcpu);
4802 if (!is_guest_mode(vcpu)) {
4803 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4804 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4805 vcpu->run->internal.ndata = 0;
4806 r = EMULATE_FAIL;
4807 }
4808 kvm_queue_exception(vcpu, UD_VECTOR);
4809
4810 return r;
4811}
4812
4813static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4814{
4815 gpa_t gpa;
4816
4817 if (tdp_enabled)
4818 return false;
4819
4820
4821
4822
4823
4824
4825 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4826 return true;
4827
4828 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4829
4830 if (gpa == UNMAPPED_GVA)
4831 return true;
4832
4833 if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
4834 return true;
4835
4836 return false;
4837}
4838
4839int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4840 unsigned long cr2,
4841 int emulation_type,
4842 void *insn,
4843 int insn_len)
4844{
4845 int r;
4846 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4847 bool writeback = true;
4848
4849 kvm_clear_exception_queue(vcpu);
4850
4851 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4852 init_emulate_ctxt(vcpu);
4853 ctxt->interruptibility = 0;
4854 ctxt->have_exception = false;
4855 ctxt->perm_ok = false;
4856
4857 ctxt->only_vendor_specific_insn
4858 = emulation_type & EMULTYPE_TRAP_UD;
4859
4860 r = x86_decode_insn(ctxt, insn, insn_len);
4861
4862 trace_kvm_emulate_insn_start(vcpu);
4863 ++vcpu->stat.insn_emulation;
4864 if (r != EMULATION_OK) {
4865 if (emulation_type & EMULTYPE_TRAP_UD)
4866 return EMULATE_FAIL;
4867 if (reexecute_instruction(vcpu, cr2))
4868 return EMULATE_DONE;
4869 if (emulation_type & EMULTYPE_SKIP)
4870 return EMULATE_FAIL;
4871 return handle_emulation_failure(vcpu);
4872 }
4873 }
4874
4875 if (emulation_type & EMULTYPE_SKIP) {
4876 kvm_rip_write(vcpu, ctxt->_eip);
4877 return EMULATE_DONE;
4878 }
4879
4880
4881
4882 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4883 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4884 memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
4885 }
4886
4887restart:
4888 r = x86_emulate_insn(ctxt);
4889
4890 if (r == EMULATION_INTERCEPTED)
4891 return EMULATE_DONE;
4892
4893 if (r == EMULATION_FAILED) {
4894 if (reexecute_instruction(vcpu, cr2))
4895 return EMULATE_DONE;
4896
4897 return handle_emulation_failure(vcpu);
4898 }
4899
4900 if (ctxt->have_exception) {
4901 inject_emulated_exception(vcpu);
4902 r = EMULATE_DONE;
4903 } else if (vcpu->arch.pio.count) {
4904 if (!vcpu->arch.pio.in)
4905 vcpu->arch.pio.count = 0;
4906 else
4907 writeback = false;
4908 r = EMULATE_DO_MMIO;
4909 } else if (vcpu->mmio_needed) {
4910 if (!vcpu->mmio_is_write)
4911 writeback = false;
4912 r = EMULATE_DO_MMIO;
4913 } else if (r == EMULATION_RESTART)
4914 goto restart;
4915 else
4916 r = EMULATE_DONE;
4917
4918 if (writeback) {
4919 toggle_interruptibility(vcpu, ctxt->interruptibility);
4920 kvm_set_rflags(vcpu, ctxt->eflags);
4921 kvm_make_request(KVM_REQ_EVENT, vcpu);
4922 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
4923 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4924 kvm_rip_write(vcpu, ctxt->eip);
4925 } else
4926 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4927
4928 return r;
4929}
4930EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4931
4932int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4933{
4934 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4935 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
4936 size, port, &val, 1);
4937
4938 vcpu->arch.pio.count = 0;
4939 return ret;
4940}
4941EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4942
4943static void tsc_bad(void *info)
4944{
4945 __this_cpu_write(cpu_tsc_khz, 0);
4946}
4947
4948static void tsc_khz_changed(void *data)
4949{
4950 struct cpufreq_freqs *freq = data;
4951 unsigned long khz = 0;
4952
4953 if (data)
4954 khz = freq->new;
4955 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4956 khz = cpufreq_quick_get(raw_smp_processor_id());
4957 if (!khz)
4958 khz = tsc_khz;
4959 __this_cpu_write(cpu_tsc_khz, khz);
4960}
4961
4962static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
4963 void *data)
4964{
4965 struct cpufreq_freqs *freq = data;
4966 struct kvm *kvm;
4967 struct kvm_vcpu *vcpu;
4968 int i, send_ipi = 0;
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5010 return 0;
5011 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5012 return 0;
5013
5014 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5015
5016 raw_spin_lock(&kvm_lock);
5017 list_for_each_entry(kvm, &vm_list, vm_list) {
5018 kvm_for_each_vcpu(i, vcpu, kvm) {
5019 if (vcpu->cpu != freq->cpu)
5020 continue;
5021 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5022 if (vcpu->cpu != smp_processor_id())
5023 send_ipi = 1;
5024 }
5025 }
5026 raw_spin_unlock(&kvm_lock);
5027
5028 if (freq->old < freq->new && send_ipi) {
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5042 }
5043 return 0;
5044}
5045
5046static struct notifier_block kvmclock_cpufreq_notifier_block = {
5047 .notifier_call = kvmclock_cpufreq_notifier
5048};
5049
5050static int kvmclock_cpu_notifier(struct notifier_block *nfb,
5051 unsigned long action, void *hcpu)
5052{
5053 unsigned int cpu = (unsigned long)hcpu;
5054
5055 switch (action) {
5056 case CPU_ONLINE:
5057 case CPU_DOWN_FAILED:
5058 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5059 break;
5060 case CPU_DOWN_PREPARE:
5061 smp_call_function_single(cpu, tsc_bad, NULL, 1);
5062 break;
5063 }
5064 return NOTIFY_OK;
5065}
5066
5067static struct notifier_block kvmclock_cpu_notifier_block = {
5068 .notifier_call = kvmclock_cpu_notifier,
5069 .priority = -INT_MAX
5070};
5071
5072static void kvm_timer_init(void)
5073{
5074 int cpu;
5075
5076 max_tsc_khz = tsc_khz;
5077 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5078 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5079#ifdef CONFIG_CPU_FREQ
5080 struct cpufreq_policy policy;
5081 memset(&policy, 0, sizeof(policy));
5082 cpu = get_cpu();
5083 cpufreq_get_policy(&policy, cpu);
5084 if (policy.cpuinfo.max_freq)
5085 max_tsc_khz = policy.cpuinfo.max_freq;
5086 put_cpu();
5087#endif
5088 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5089 CPUFREQ_TRANSITION_NOTIFIER);
5090 }
5091 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5092 for_each_online_cpu(cpu)
5093 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5094}
5095
5096static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5097
5098static int kvm_is_in_guest(void)
5099{
5100 return percpu_read(current_vcpu) != NULL;
5101}
5102
5103static int kvm_is_user_mode(void)
5104{
5105 int user_mode = 3;
5106
5107 if (percpu_read(current_vcpu))
5108 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
5109
5110 return user_mode != 0;
5111}
5112
5113static unsigned long kvm_get_guest_ip(void)
5114{
5115 unsigned long ip = 0;
5116
5117 if (percpu_read(current_vcpu))
5118 ip = kvm_rip_read(percpu_read(current_vcpu));
5119
5120 return ip;
5121}
5122
5123static struct perf_guest_info_callbacks kvm_guest_cbs = {
5124 .is_in_guest = kvm_is_in_guest,
5125 .is_user_mode = kvm_is_user_mode,
5126 .get_guest_ip = kvm_get_guest_ip,
5127};
5128
5129void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5130{
5131 percpu_write(current_vcpu, vcpu);
5132}
5133EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5134
5135void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5136{
5137 percpu_write(current_vcpu, NULL);
5138}
5139EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5140
5141static void kvm_set_mmio_spte_mask(void)
5142{
5143 u64 mask;
5144 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5145
5146
5147
5148
5149
5150 mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr;
5151 mask |= 1ull;
5152
5153#ifdef CONFIG_X86_64
5154
5155
5156
5157
5158 if (maxphyaddr == 52)
5159 mask &= ~1ull;
5160#endif
5161
5162 kvm_mmu_set_mmio_spte_mask(mask);
5163}
5164
5165int kvm_arch_init(void *opaque)
5166{
5167 int r;
5168 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
5169
5170 if (kvm_x86_ops) {
5171 printk(KERN_ERR "kvm: already loaded the other module\n");
5172 r = -EEXIST;
5173 goto out;
5174 }
5175
5176 if (!ops->cpu_has_kvm_support()) {
5177 printk(KERN_ERR "kvm: no hardware support\n");
5178 r = -EOPNOTSUPP;
5179 goto out;
5180 }
5181 if (ops->disabled_by_bios()) {
5182 printk(KERN_ERR "kvm: disabled by bios\n");
5183 r = -EOPNOTSUPP;
5184 goto out;
5185 }
5186
5187 r = kvm_mmu_module_init();
5188 if (r)
5189 goto out;
5190
5191 kvm_set_mmio_spte_mask();
5192 kvm_init_msr_list();
5193
5194 kvm_x86_ops = ops;
5195 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5196 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5197
5198 kvm_timer_init();
5199
5200 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5201
5202 if (cpu_has_xsave)
5203 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5204
5205 return 0;
5206
5207out:
5208 return r;
5209}
5210
5211void kvm_arch_exit(void)
5212{
5213 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5214
5215 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5216 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5217 CPUFREQ_TRANSITION_NOTIFIER);
5218 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5219 kvm_x86_ops = NULL;
5220 kvm_mmu_module_exit();
5221}
5222
5223int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5224{
5225 ++vcpu->stat.halt_exits;
5226 if (irqchip_in_kernel(vcpu->kvm)) {
5227 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5228 return 1;
5229 } else {
5230 vcpu->run->exit_reason = KVM_EXIT_HLT;
5231 return 0;
5232 }
5233}
5234EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5235
5236static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
5237 unsigned long a1)
5238{
5239 if (is_long_mode(vcpu))
5240 return a0;
5241 else
5242 return a0 | ((gpa_t)a1 << 32);
5243}
5244
5245int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5246{
5247 u64 param, ingpa, outgpa, ret;
5248 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5249 bool fast, longmode;
5250 int cs_db, cs_l;
5251
5252
5253
5254
5255
5256 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5257 kvm_queue_exception(vcpu, UD_VECTOR);
5258 return 0;
5259 }
5260
5261 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5262 longmode = is_long_mode(vcpu) && cs_l == 1;
5263
5264 if (!longmode) {
5265 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5266 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5267 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5268 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5269 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5270 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5271 }
5272#ifdef CONFIG_X86_64
5273 else {
5274 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5275 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5276 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5277 }
5278#endif
5279
5280 code = param & 0xffff;
5281 fast = (param >> 16) & 0x1;
5282 rep_cnt = (param >> 32) & 0xfff;
5283 rep_idx = (param >> 48) & 0xfff;
5284
5285 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5286
5287 switch (code) {
5288 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5289 kvm_vcpu_on_spin(vcpu);
5290 break;
5291 default:
5292 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5293 break;
5294 }
5295
5296 ret = res | (((u64)rep_done & 0xfff) << 32);
5297 if (longmode) {
5298 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5299 } else {
5300 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5301 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5302 }
5303
5304 return 1;
5305}
5306
5307int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5308{
5309 unsigned long nr, a0, a1, a2, a3, ret;
5310 int r = 1;
5311
5312 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5313 return kvm_hv_hypercall(vcpu);
5314
5315 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5316 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5317 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5318 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5319 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5320
5321 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5322
5323 if (!is_long_mode(vcpu)) {
5324 nr &= 0xFFFFFFFF;
5325 a0 &= 0xFFFFFFFF;
5326 a1 &= 0xFFFFFFFF;
5327 a2 &= 0xFFFFFFFF;
5328 a3 &= 0xFFFFFFFF;
5329 }
5330
5331 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5332 ret = -KVM_EPERM;
5333 goto out;
5334 }
5335
5336 switch (nr) {
5337 case KVM_HC_VAPIC_POLL_IRQ:
5338 ret = 0;
5339 break;
5340 case KVM_HC_MMU_OP:
5341 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
5342 break;
5343 default:
5344 ret = -KVM_ENOSYS;
5345 break;
5346 }
5347out:
5348 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5349 ++vcpu->stat.hypercalls;
5350 return r;
5351}
5352EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5353
5354int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5355{
5356 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5357 char instruction[3];
5358 unsigned long rip = kvm_rip_read(vcpu);
5359
5360
5361
5362
5363
5364
5365 kvm_mmu_zap_all(vcpu->kvm);
5366
5367 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5368
5369 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5370}
5371
5372static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
5373{
5374 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
5375 int j, nent = vcpu->arch.cpuid_nent;
5376
5377 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
5378
5379 for (j = i + 1; ; j = (j + 1) % nent) {
5380 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
5381 if (ej->function == e->function) {
5382 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
5383 return j;
5384 }
5385 }
5386 return 0;
5387}
5388
5389
5390
5391static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
5392 u32 function, u32 index)
5393{
5394 if (e->function != function)
5395 return 0;
5396 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
5397 return 0;
5398 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
5399 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
5400 return 0;
5401 return 1;
5402}
5403
5404struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
5405 u32 function, u32 index)
5406{
5407 int i;
5408 struct kvm_cpuid_entry2 *best = NULL;
5409
5410 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
5411 struct kvm_cpuid_entry2 *e;
5412
5413 e = &vcpu->arch.cpuid_entries[i];
5414 if (is_matching_cpuid_entry(e, function, index)) {
5415 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
5416 move_to_next_stateful_cpuid_entry(vcpu, i);
5417 best = e;
5418 break;
5419 }
5420 }
5421 return best;
5422}
5423EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
5424
5425int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
5426{
5427 struct kvm_cpuid_entry2 *best;
5428
5429 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
5430 if (!best || best->eax < 0x80000008)
5431 goto not_found;
5432 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
5433 if (best)
5434 return best->eax & 0xff;
5435not_found:
5436 return 36;
5437}
5438
5439
5440
5441
5442
5443
5444static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
5445 u32 function, u32 index)
5446{
5447 struct kvm_cpuid_entry2 *maxlevel;
5448
5449 maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
5450 if (!maxlevel || maxlevel->eax >= function)
5451 return NULL;
5452 if (function & 0x80000000) {
5453 maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
5454 if (!maxlevel)
5455 return NULL;
5456 }
5457 return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
5458}
5459
5460void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
5461{
5462 u32 function, index;
5463 struct kvm_cpuid_entry2 *best;
5464
5465 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
5466 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
5467 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
5468 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
5469 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
5470 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
5471 best = kvm_find_cpuid_entry(vcpu, function, index);
5472
5473 if (!best)
5474 best = check_cpuid_limit(vcpu, function, index);
5475
5476 if (best) {
5477 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
5478 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
5479 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
5480 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
5481 }
5482 kvm_x86_ops->skip_emulated_instruction(vcpu);
5483 trace_kvm_cpuid(function,
5484 kvm_register_read(vcpu, VCPU_REGS_RAX),
5485 kvm_register_read(vcpu, VCPU_REGS_RBX),
5486 kvm_register_read(vcpu, VCPU_REGS_RCX),
5487 kvm_register_read(vcpu, VCPU_REGS_RDX));
5488}
5489EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
5490
5491
5492
5493
5494
5495
5496
5497static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5498{
5499 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5500 vcpu->run->request_interrupt_window &&
5501 kvm_arch_interrupt_allowed(vcpu));
5502}
5503
5504static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5505{
5506 struct kvm_run *kvm_run = vcpu->run;
5507
5508 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5509 kvm_run->cr8 = kvm_get_cr8(vcpu);
5510 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5511 if (irqchip_in_kernel(vcpu->kvm))
5512 kvm_run->ready_for_interrupt_injection = 1;
5513 else
5514 kvm_run->ready_for_interrupt_injection =
5515 kvm_arch_interrupt_allowed(vcpu) &&
5516 !kvm_cpu_has_interrupt(vcpu) &&
5517 !kvm_event_needs_reinjection(vcpu);
5518}
5519
5520static void vapic_enter(struct kvm_vcpu *vcpu)
5521{
5522 struct kvm_lapic *apic = vcpu->arch.apic;
5523 struct page *page;
5524
5525 if (!apic || !apic->vapic_addr)
5526 return;
5527
5528 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5529
5530 vcpu->arch.apic->vapic_page = page;
5531}
5532
5533static void vapic_exit(struct kvm_vcpu *vcpu)
5534{
5535 struct kvm_lapic *apic = vcpu->arch.apic;
5536 int idx;
5537
5538 if (!apic || !apic->vapic_addr)
5539 return;
5540
5541 idx = srcu_read_lock(&vcpu->kvm->srcu);
5542 kvm_release_page_dirty(apic->vapic_page);
5543 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5544 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5545}
5546
5547static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5548{
5549 int max_irr, tpr;
5550
5551 if (!kvm_x86_ops->update_cr8_intercept)
5552 return;
5553
5554 if (!vcpu->arch.apic)
5555 return;
5556
5557 if (!vcpu->arch.apic->vapic_addr)
5558 max_irr = kvm_lapic_find_highest_irr(vcpu);
5559 else
5560 max_irr = -1;
5561
5562 if (max_irr != -1)
5563 max_irr >>= 4;
5564
5565 tpr = kvm_lapic_get_cr8(vcpu);
5566
5567 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5568}
5569
5570static void inject_pending_event(struct kvm_vcpu *vcpu)
5571{
5572
5573 if (vcpu->arch.exception.pending) {
5574 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5575 vcpu->arch.exception.has_error_code,
5576 vcpu->arch.exception.error_code);
5577 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5578 vcpu->arch.exception.has_error_code,
5579 vcpu->arch.exception.error_code,
5580 vcpu->arch.exception.reinject);
5581 return;
5582 }
5583
5584 if (vcpu->arch.nmi_injected) {
5585 kvm_x86_ops->set_nmi(vcpu);
5586 return;
5587 }
5588
5589 if (vcpu->arch.interrupt.pending) {
5590 kvm_x86_ops->set_irq(vcpu);
5591 return;
5592 }
5593
5594
5595 if (vcpu->arch.nmi_pending) {
5596 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5597 --vcpu->arch.nmi_pending;
5598 vcpu->arch.nmi_injected = true;
5599 kvm_x86_ops->set_nmi(vcpu);
5600 }
5601 } else if (kvm_cpu_has_interrupt(vcpu)) {
5602 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5603 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5604 false);
5605 kvm_x86_ops->set_irq(vcpu);
5606 }
5607 }
5608}
5609
5610static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
5611{
5612 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
5613 !vcpu->guest_xcr0_loaded) {
5614
5615 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
5616 vcpu->guest_xcr0_loaded = 1;
5617 }
5618}
5619
5620static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
5621{
5622 if (vcpu->guest_xcr0_loaded) {
5623 if (vcpu->arch.xcr0 != host_xcr0)
5624 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
5625 vcpu->guest_xcr0_loaded = 0;
5626 }
5627}
5628
5629static void process_nmi(struct kvm_vcpu *vcpu)
5630{
5631 unsigned limit = 2;
5632
5633
5634
5635
5636
5637
5638 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
5639 limit = 1;
5640
5641 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
5642 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
5643 kvm_make_request(KVM_REQ_EVENT, vcpu);
5644}
5645
5646static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5647{
5648 int r;
5649 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5650 vcpu->run->request_interrupt_window;
5651
5652 if (vcpu->requests) {
5653 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5654 kvm_mmu_unload(vcpu);
5655 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5656 __kvm_migrate_timers(vcpu);
5657 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5658 r = kvm_guest_time_update(vcpu);
5659 if (unlikely(r))
5660 goto out;
5661 }
5662 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5663 kvm_mmu_sync_roots(vcpu);
5664 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5665 kvm_x86_ops->tlb_flush(vcpu);
5666 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5667 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5668 r = 0;
5669 goto out;
5670 }
5671 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5672 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5673 r = 0;
5674 goto out;
5675 }
5676 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5677 vcpu->fpu_active = 0;
5678 kvm_x86_ops->fpu_deactivate(vcpu);
5679 }
5680 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5681
5682 vcpu->arch.apf.halted = true;
5683 r = 1;
5684 goto out;
5685 }
5686 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5687 record_steal_time(vcpu);
5688 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5689 process_nmi(vcpu);
5690
5691 }
5692
5693 r = kvm_mmu_reload(vcpu);
5694 if (unlikely(r))
5695 goto out;
5696
5697 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5698 inject_pending_event(vcpu);
5699
5700
5701 if (vcpu->arch.nmi_pending)
5702 kvm_x86_ops->enable_nmi_window(vcpu);
5703 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5704 kvm_x86_ops->enable_irq_window(vcpu);
5705
5706 if (kvm_lapic_enabled(vcpu)) {
5707 update_cr8_intercept(vcpu);
5708 kvm_lapic_sync_to_vapic(vcpu);
5709 }
5710 }
5711
5712 preempt_disable();
5713
5714 kvm_x86_ops->prepare_guest_switch(vcpu);
5715 if (vcpu->fpu_active)
5716 kvm_load_guest_fpu(vcpu);
5717 kvm_load_guest_xcr0(vcpu);
5718
5719 vcpu->mode = IN_GUEST_MODE;
5720
5721
5722
5723
5724 smp_mb();
5725
5726 local_irq_disable();
5727
5728 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5729 || need_resched() || signal_pending(current)) {
5730 vcpu->mode = OUTSIDE_GUEST_MODE;
5731 smp_wmb();
5732 local_irq_enable();
5733 preempt_enable();
5734 kvm_x86_ops->cancel_injection(vcpu);
5735 r = 1;
5736 goto out;
5737 }
5738
5739 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5740
5741 kvm_guest_enter();
5742
5743 if (unlikely(vcpu->arch.switch_db_regs)) {
5744 set_debugreg(0, 7);
5745 set_debugreg(vcpu->arch.eff_db[0], 0);
5746 set_debugreg(vcpu->arch.eff_db[1], 1);
5747 set_debugreg(vcpu->arch.eff_db[2], 2);
5748 set_debugreg(vcpu->arch.eff_db[3], 3);
5749 }
5750
5751 trace_kvm_entry(vcpu->vcpu_id);
5752 kvm_x86_ops->run(vcpu);
5753
5754
5755
5756
5757
5758
5759
5760
5761 if (hw_breakpoint_active())
5762 hw_breakpoint_restore();
5763
5764 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
5765
5766 vcpu->mode = OUTSIDE_GUEST_MODE;
5767 smp_wmb();
5768 local_irq_enable();
5769
5770 ++vcpu->stat.exits;
5771
5772
5773
5774
5775
5776
5777
5778 barrier();
5779
5780 kvm_guest_exit();
5781
5782 preempt_enable();
5783
5784 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5785
5786
5787
5788
5789 if (unlikely(prof_on == KVM_PROFILING)) {
5790 unsigned long rip = kvm_rip_read(vcpu);
5791 profile_hit(KVM_PROFILING, (void *)rip);
5792 }
5793
5794
5795 kvm_lapic_sync_from_vapic(vcpu);
5796
5797 r = kvm_x86_ops->handle_exit(vcpu);
5798out:
5799 return r;
5800}
5801
5802
5803static int __vcpu_run(struct kvm_vcpu *vcpu)
5804{
5805 int r;
5806 struct kvm *kvm = vcpu->kvm;
5807
5808 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
5809 pr_debug("vcpu %d received sipi with vector # %x\n",
5810 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5811 kvm_lapic_reset(vcpu);
5812 r = kvm_arch_vcpu_reset(vcpu);
5813 if (r)
5814 return r;
5815 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5816 }
5817
5818 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5819 vapic_enter(vcpu);
5820
5821 r = 1;
5822 while (r > 0) {
5823 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5824 !vcpu->arch.apf.halted)
5825 r = vcpu_enter_guest(vcpu);
5826 else {
5827 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5828 kvm_vcpu_block(vcpu);
5829 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5830 if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
5831 {
5832 switch(vcpu->arch.mp_state) {
5833 case KVM_MP_STATE_HALTED:
5834 vcpu->arch.mp_state =
5835 KVM_MP_STATE_RUNNABLE;
5836 case KVM_MP_STATE_RUNNABLE:
5837 vcpu->arch.apf.halted = false;
5838 break;
5839 case KVM_MP_STATE_SIPI_RECEIVED:
5840 default:
5841 r = -EINTR;
5842 break;
5843 }
5844 }
5845 }
5846
5847 if (r <= 0)
5848 break;
5849
5850 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
5851 if (kvm_cpu_has_pending_timer(vcpu))
5852 kvm_inject_pending_timer_irqs(vcpu);
5853
5854 if (dm_request_for_irq_injection(vcpu)) {
5855 r = -EINTR;
5856 vcpu->run->exit_reason = KVM_EXIT_INTR;
5857 ++vcpu->stat.request_irq_exits;
5858 }
5859
5860 kvm_check_async_pf_completion(vcpu);
5861
5862 if (signal_pending(current)) {
5863 r = -EINTR;
5864 vcpu->run->exit_reason = KVM_EXIT_INTR;
5865 ++vcpu->stat.signal_exits;
5866 }
5867 if (need_resched()) {
5868 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5869 kvm_resched(vcpu);
5870 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5871 }
5872 }
5873
5874 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5875
5876 vapic_exit(vcpu);
5877
5878 return r;
5879}
5880
5881static int complete_mmio(struct kvm_vcpu *vcpu)
5882{
5883 struct kvm_run *run = vcpu->run;
5884 int r;
5885
5886 if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
5887 return 1;
5888
5889 if (vcpu->mmio_needed) {
5890 vcpu->mmio_needed = 0;
5891 if (!vcpu->mmio_is_write)
5892 memcpy(vcpu->mmio_data + vcpu->mmio_index,
5893 run->mmio.data, 8);
5894 vcpu->mmio_index += 8;
5895 if (vcpu->mmio_index < vcpu->mmio_size) {
5896 run->exit_reason = KVM_EXIT_MMIO;
5897 run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
5898 memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
5899 run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
5900 run->mmio.is_write = vcpu->mmio_is_write;
5901 vcpu->mmio_needed = 1;
5902 return 0;
5903 }
5904 if (vcpu->mmio_is_write)
5905 return 1;
5906 vcpu->mmio_read_completed = 1;
5907 }
5908 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5909 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5910 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5911 if (r != EMULATE_DONE)
5912 return 0;
5913 return 1;
5914}
5915
5916int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5917{
5918 int r;
5919 sigset_t sigsaved;
5920
5921 if (!tsk_used_math(current) && init_fpu(current))
5922 return -ENOMEM;
5923
5924 if (vcpu->sigset_active)
5925 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
5926
5927 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
5928 kvm_vcpu_block(vcpu);
5929 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
5930 r = -EAGAIN;
5931 goto out;
5932 }
5933
5934
5935 if (!irqchip_in_kernel(vcpu->kvm)) {
5936 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
5937 r = -EINVAL;
5938 goto out;
5939 }
5940 }
5941
5942 r = complete_mmio(vcpu);
5943 if (r <= 0)
5944 goto out;
5945
5946 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
5947 kvm_register_write(vcpu, VCPU_REGS_RAX,
5948 kvm_run->hypercall.ret);
5949
5950 r = __vcpu_run(vcpu);
5951
5952out:
5953 post_kvm_run_save(vcpu);
5954 if (vcpu->sigset_active)
5955 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
5956
5957 return r;
5958}
5959
5960int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5961{
5962 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5963
5964
5965
5966
5967
5968
5969
5970 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5971 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
5972 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5973 }
5974 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5975 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5976 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5977 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5978 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
5979 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
5980 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5981 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5982#ifdef CONFIG_X86_64
5983 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
5984 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
5985 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
5986 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
5987 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
5988 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
5989 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
5990 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
5991#endif
5992
5993 regs->rip = kvm_rip_read(vcpu);
5994 regs->rflags = kvm_get_rflags(vcpu);
5995
5996 return 0;
5997}
5998
5999int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6000{
6001 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6002 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6003
6004 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
6005 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
6006 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
6007 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
6008 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
6009 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
6010 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
6011 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
6012#ifdef CONFIG_X86_64
6013 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
6014 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
6015 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
6016 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
6017 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
6018 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
6019 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
6020 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
6021#endif
6022
6023 kvm_rip_write(vcpu, regs->rip);
6024 kvm_set_rflags(vcpu, regs->rflags);
6025
6026 vcpu->arch.exception.pending = false;
6027
6028 kvm_make_request(KVM_REQ_EVENT, vcpu);
6029
6030 return 0;
6031}
6032
6033void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
6034{
6035 struct kvm_segment cs;
6036
6037 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6038 *db = cs.db;
6039 *l = cs.l;
6040}
6041EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
6042
6043int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
6044 struct kvm_sregs *sregs)
6045{
6046 struct desc_ptr dt;
6047
6048 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6049 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6050 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6051 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6052 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6053 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6054
6055 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6056 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6057
6058 kvm_x86_ops->get_idt(vcpu, &dt);
6059 sregs->idt.limit = dt.size;
6060 sregs->idt.base = dt.address;
6061 kvm_x86_ops->get_gdt(vcpu, &dt);
6062 sregs->gdt.limit = dt.size;
6063 sregs->gdt.base = dt.address;
6064
6065 sregs->cr0 = kvm_read_cr0(vcpu);
6066 sregs->cr2 = vcpu->arch.cr2;
6067 sregs->cr3 = kvm_read_cr3(vcpu);
6068 sregs->cr4 = kvm_read_cr4(vcpu);
6069 sregs->cr8 = kvm_get_cr8(vcpu);
6070 sregs->efer = vcpu->arch.efer;
6071 sregs->apic_base = kvm_get_apic_base(vcpu);
6072
6073 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
6074
6075 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
6076 set_bit(vcpu->arch.interrupt.nr,
6077 (unsigned long *)sregs->interrupt_bitmap);
6078
6079 return 0;
6080}
6081
6082int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
6083 struct kvm_mp_state *mp_state)
6084{
6085 mp_state->mp_state = vcpu->arch.mp_state;
6086 return 0;
6087}
6088
6089int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
6090 struct kvm_mp_state *mp_state)
6091{
6092 vcpu->arch.mp_state = mp_state->mp_state;
6093 kvm_make_request(KVM_REQ_EVENT, vcpu);
6094 return 0;
6095}
6096
6097int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
6098 bool has_error_code, u32 error_code)
6099{
6100 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6101 int ret;
6102
6103 init_emulate_ctxt(vcpu);
6104
6105 ret = emulator_task_switch(ctxt, tss_selector, reason,
6106 has_error_code, error_code);
6107
6108 if (ret)
6109 return EMULATE_FAIL;
6110
6111 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
6112 kvm_rip_write(vcpu, ctxt->eip);
6113 kvm_set_rflags(vcpu, ctxt->eflags);
6114 kvm_make_request(KVM_REQ_EVENT, vcpu);
6115 return EMULATE_DONE;
6116}
6117EXPORT_SYMBOL_GPL(kvm_task_switch);
6118
6119int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6120 struct kvm_sregs *sregs)
6121{
6122 int mmu_reset_needed = 0;
6123 int pending_vec, max_bits, idx;
6124 struct desc_ptr dt;
6125
6126 dt.size = sregs->idt.limit;
6127 dt.address = sregs->idt.base;
6128 kvm_x86_ops->set_idt(vcpu, &dt);
6129 dt.size = sregs->gdt.limit;
6130 dt.address = sregs->gdt.base;
6131 kvm_x86_ops->set_gdt(vcpu, &dt);
6132
6133 vcpu->arch.cr2 = sregs->cr2;
6134 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
6135 vcpu->arch.cr3 = sregs->cr3;
6136 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
6137
6138 kvm_set_cr8(vcpu, sregs->cr8);
6139
6140 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6141 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6142 kvm_set_apic_base(vcpu, sregs->apic_base);
6143
6144 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6145 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
6146 vcpu->arch.cr0 = sregs->cr0;
6147
6148 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
6149 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
6150 if (sregs->cr4 & X86_CR4_OSXSAVE)
6151 update_cpuid(vcpu);
6152
6153 idx = srcu_read_lock(&vcpu->kvm->srcu);
6154 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
6155 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6156 mmu_reset_needed = 1;
6157 }
6158 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6159
6160 if (mmu_reset_needed)
6161 kvm_mmu_reset_context(vcpu);
6162
6163 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
6164 pending_vec = find_first_bit(
6165 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
6166 if (pending_vec < max_bits) {
6167 kvm_queue_interrupt(vcpu, pending_vec, false);
6168 pr_debug("Set back pending irq %d\n", pending_vec);
6169 }
6170
6171 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6172 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6173 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6174 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6175 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6176 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6177
6178 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6179 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6180
6181 update_cr8_intercept(vcpu);
6182
6183
6184 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
6185 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
6186 !is_protmode(vcpu))
6187 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6188
6189 kvm_make_request(KVM_REQ_EVENT, vcpu);
6190
6191 return 0;
6192}
6193
6194int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
6195 struct kvm_guest_debug *dbg)
6196{
6197 unsigned long rflags;
6198 int i, r;
6199
6200 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
6201 r = -EBUSY;
6202 if (vcpu->arch.exception.pending)
6203 goto out;
6204 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
6205 kvm_queue_exception(vcpu, DB_VECTOR);
6206 else
6207 kvm_queue_exception(vcpu, BP_VECTOR);
6208 }
6209
6210
6211
6212
6213
6214 rflags = kvm_get_rflags(vcpu);
6215
6216 vcpu->guest_debug = dbg->control;
6217 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
6218 vcpu->guest_debug = 0;
6219
6220 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6221 for (i = 0; i < KVM_NR_DB_REGS; ++i)
6222 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
6223 vcpu->arch.switch_db_regs =
6224 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
6225 } else {
6226 for (i = 0; i < KVM_NR_DB_REGS; i++)
6227 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6228 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
6229 }
6230
6231 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6232 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
6233 get_segment_base(vcpu, VCPU_SREG_CS);
6234
6235
6236
6237
6238
6239 kvm_set_rflags(vcpu, rflags);
6240
6241 kvm_x86_ops->set_guest_debug(vcpu, dbg);
6242
6243 r = 0;
6244
6245out:
6246
6247 return r;
6248}
6249
6250
6251
6252
6253int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6254 struct kvm_translation *tr)
6255{
6256 unsigned long vaddr = tr->linear_address;
6257 gpa_t gpa;
6258 int idx;
6259
6260 idx = srcu_read_lock(&vcpu->kvm->srcu);
6261 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
6262 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6263 tr->physical_address = gpa;
6264 tr->valid = gpa != UNMAPPED_GVA;
6265 tr->writeable = 1;
6266 tr->usermode = 0;
6267
6268 return 0;
6269}
6270
6271int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6272{
6273 struct i387_fxsave_struct *fxsave =
6274 &vcpu->arch.guest_fpu.state->fxsave;
6275
6276 memcpy(fpu->fpr, fxsave->st_space, 128);
6277 fpu->fcw = fxsave->cwd;
6278 fpu->fsw = fxsave->swd;
6279 fpu->ftwx = fxsave->twd;
6280 fpu->last_opcode = fxsave->fop;
6281 fpu->last_ip = fxsave->rip;
6282 fpu->last_dp = fxsave->rdp;
6283 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
6284
6285 return 0;
6286}
6287
6288int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6289{
6290 struct i387_fxsave_struct *fxsave =
6291 &vcpu->arch.guest_fpu.state->fxsave;
6292
6293 memcpy(fxsave->st_space, fpu->fpr, 128);
6294 fxsave->cwd = fpu->fcw;
6295 fxsave->swd = fpu->fsw;
6296 fxsave->twd = fpu->ftwx;
6297 fxsave->fop = fpu->last_opcode;
6298 fxsave->rip = fpu->last_ip;
6299 fxsave->rdp = fpu->last_dp;
6300 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6301
6302 return 0;
6303}
6304
6305int fx_init(struct kvm_vcpu *vcpu)
6306{
6307 int err;
6308
6309 err = fpu_alloc(&vcpu->arch.guest_fpu);
6310 if (err)
6311 return err;
6312
6313 fpu_finit(&vcpu->arch.guest_fpu);
6314
6315
6316
6317
6318 vcpu->arch.xcr0 = XSTATE_FP;
6319
6320 vcpu->arch.cr0 |= X86_CR0_ET;
6321
6322 return 0;
6323}
6324EXPORT_SYMBOL_GPL(fx_init);
6325
6326static void fx_free(struct kvm_vcpu *vcpu)
6327{
6328 fpu_free(&vcpu->arch.guest_fpu);
6329}
6330
6331void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6332{
6333 if (vcpu->guest_fpu_loaded)
6334 return;
6335
6336
6337
6338
6339
6340
6341 kvm_put_guest_xcr0(vcpu);
6342 vcpu->guest_fpu_loaded = 1;
6343 unlazy_fpu(current);
6344 fpu_restore_checking(&vcpu->arch.guest_fpu);
6345 trace_kvm_fpu(1);
6346}
6347
6348void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6349{
6350 kvm_put_guest_xcr0(vcpu);
6351
6352 if (!vcpu->guest_fpu_loaded)
6353 return;
6354
6355 vcpu->guest_fpu_loaded = 0;
6356 fpu_save_init(&vcpu->arch.guest_fpu);
6357 ++vcpu->stat.fpu_reload;
6358 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6359 trace_kvm_fpu(0);
6360}
6361
6362void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6363{
6364 kvmclock_reset(vcpu);
6365
6366 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6367 fx_free(vcpu);
6368 kvm_x86_ops->vcpu_free(vcpu);
6369}
6370
6371struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6372 unsigned int id)
6373{
6374 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6375 printk_once(KERN_WARNING
6376 "kvm: SMP vm created on host with unstable TSC; "
6377 "guest TSC will not be reliable\n");
6378 return kvm_x86_ops->vcpu_create(kvm, id);
6379}
6380
6381int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6382{
6383 int r;
6384
6385 vcpu->arch.mtrr_state.have_fixed = 1;
6386 vcpu_load(vcpu);
6387 r = kvm_arch_vcpu_reset(vcpu);
6388 if (r == 0)
6389 r = kvm_mmu_setup(vcpu);
6390 vcpu_put(vcpu);
6391
6392 return r;
6393}
6394
6395void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6396{
6397 vcpu->arch.apf.msr_val = 0;
6398
6399 vcpu_load(vcpu);
6400 kvm_mmu_unload(vcpu);
6401 vcpu_put(vcpu);
6402
6403 fx_free(vcpu);
6404 kvm_x86_ops->vcpu_free(vcpu);
6405}
6406
6407int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
6408{
6409 atomic_set(&vcpu->arch.nmi_queued, 0);
6410 vcpu->arch.nmi_pending = 0;
6411 vcpu->arch.nmi_injected = false;
6412
6413 vcpu->arch.switch_db_regs = 0;
6414 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6415 vcpu->arch.dr6 = DR6_FIXED_1;
6416 vcpu->arch.dr7 = DR7_FIXED_1;
6417
6418 kvm_make_request(KVM_REQ_EVENT, vcpu);
6419 vcpu->arch.apf.msr_val = 0;
6420 vcpu->arch.st.msr_val = 0;
6421
6422 kvmclock_reset(vcpu);
6423
6424 kvm_clear_async_pf_completion_queue(vcpu);
6425 kvm_async_pf_hash_reset(vcpu);
6426 vcpu->arch.apf.halted = false;
6427
6428 return kvm_x86_ops->vcpu_reset(vcpu);
6429}
6430
6431int kvm_arch_hardware_enable(void *garbage)
6432{
6433 struct kvm *kvm;
6434 struct kvm_vcpu *vcpu;
6435 int i;
6436
6437 kvm_shared_msr_cpu_online();
6438 list_for_each_entry(kvm, &vm_list, vm_list)
6439 kvm_for_each_vcpu(i, vcpu, kvm)
6440 if (vcpu->cpu == smp_processor_id())
6441 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6442 return kvm_x86_ops->hardware_enable(garbage);
6443}
6444
6445void kvm_arch_hardware_disable(void *garbage)
6446{
6447 kvm_x86_ops->hardware_disable(garbage);
6448 drop_user_return_notifiers(garbage);
6449}
6450
6451int kvm_arch_hardware_setup(void)
6452{
6453 return kvm_x86_ops->hardware_setup();
6454}
6455
6456void kvm_arch_hardware_unsetup(void)
6457{
6458 kvm_x86_ops->hardware_unsetup();
6459}
6460
6461void kvm_arch_check_processor_compat(void *rtn)
6462{
6463 kvm_x86_ops->check_processor_compatibility(rtn);
6464}
6465
6466int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6467{
6468 struct page *page;
6469 struct kvm *kvm;
6470 int r;
6471
6472 BUG_ON(vcpu->kvm == NULL);
6473 kvm = vcpu->kvm;
6474
6475 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
6476 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
6477 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
6478 vcpu->arch.mmu.translate_gpa = translate_gpa;
6479 vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
6480 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
6481 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6482 else
6483 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
6484
6485 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6486 if (!page) {
6487 r = -ENOMEM;
6488 goto fail;
6489 }
6490 vcpu->arch.pio_data = page_address(page);
6491
6492 kvm_init_tsc_catchup(vcpu, max_tsc_khz);
6493
6494 r = kvm_mmu_create(vcpu);
6495 if (r < 0)
6496 goto fail_free_pio_data;
6497
6498 if (irqchip_in_kernel(kvm)) {
6499 r = kvm_create_lapic(vcpu);
6500 if (r < 0)
6501 goto fail_mmu_destroy;
6502 }
6503
6504 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6505 GFP_KERNEL);
6506 if (!vcpu->arch.mce_banks) {
6507 r = -ENOMEM;
6508 goto fail_free_lapic;
6509 }
6510 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
6511
6512 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
6513 goto fail_free_mce_banks;
6514
6515 kvm_async_pf_hash_reset(vcpu);
6516
6517 return 0;
6518fail_free_mce_banks:
6519 kfree(vcpu->arch.mce_banks);
6520fail_free_lapic:
6521 kvm_free_lapic(vcpu);
6522fail_mmu_destroy:
6523 kvm_mmu_destroy(vcpu);
6524fail_free_pio_data:
6525 free_page((unsigned long)vcpu->arch.pio_data);
6526fail:
6527 return r;
6528}
6529
6530void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6531{
6532 int idx;
6533
6534 kfree(vcpu->arch.mce_banks);
6535 kvm_free_lapic(vcpu);
6536 idx = srcu_read_lock(&vcpu->kvm->srcu);
6537 kvm_mmu_destroy(vcpu);
6538 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6539 free_page((unsigned long)vcpu->arch.pio_data);
6540}
6541
6542int kvm_arch_init_vm(struct kvm *kvm)
6543{
6544 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6545 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6546
6547
6548 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6549
6550 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6551
6552 return 0;
6553}
6554
6555static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6556{
6557 vcpu_load(vcpu);
6558 kvm_mmu_unload(vcpu);
6559 vcpu_put(vcpu);
6560}
6561
6562static void kvm_free_vcpus(struct kvm *kvm)
6563{
6564 unsigned int i;
6565 struct kvm_vcpu *vcpu;
6566
6567
6568
6569
6570 kvm_for_each_vcpu(i, vcpu, kvm) {
6571 kvm_clear_async_pf_completion_queue(vcpu);
6572 kvm_unload_vcpu_mmu(vcpu);
6573 }
6574 kvm_for_each_vcpu(i, vcpu, kvm)
6575 kvm_arch_vcpu_free(vcpu);
6576
6577 mutex_lock(&kvm->lock);
6578 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
6579 kvm->vcpus[i] = NULL;
6580
6581 atomic_set(&kvm->online_vcpus, 0);
6582 mutex_unlock(&kvm->lock);
6583}
6584
6585void kvm_arch_sync_events(struct kvm *kvm)
6586{
6587 kvm_free_all_assigned_devices(kvm);
6588 kvm_free_pit(kvm);
6589}
6590
6591void kvm_arch_destroy_vm(struct kvm *kvm)
6592{
6593 kvm_iommu_unmap_guest(kvm);
6594 kfree(kvm->arch.vpic);
6595 kfree(kvm->arch.vioapic);
6596 kvm_free_vcpus(kvm);
6597 if (kvm->arch.apic_access_page)
6598 put_page(kvm->arch.apic_access_page);
6599 if (kvm->arch.ept_identity_pagetable)
6600 put_page(kvm->arch.ept_identity_pagetable);
6601}
6602
6603int kvm_arch_prepare_memory_region(struct kvm *kvm,
6604 struct kvm_memory_slot *memslot,
6605 struct kvm_memory_slot old,
6606 struct kvm_userspace_memory_region *mem,
6607 int user_alloc)
6608{
6609 int npages = memslot->npages;
6610 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6611
6612
6613 if (memslot->id >= KVM_MEMORY_SLOTS)
6614 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6615
6616
6617
6618
6619 if (!user_alloc) {
6620 if (npages && !old.rmap) {
6621 unsigned long userspace_addr;
6622
6623 down_write(¤t->mm->mmap_sem);
6624 userspace_addr = do_mmap(NULL, 0,
6625 npages * PAGE_SIZE,
6626 PROT_READ | PROT_WRITE,
6627 map_flags,
6628 0);
6629 up_write(¤t->mm->mmap_sem);
6630
6631 if (IS_ERR((void *)userspace_addr))
6632 return PTR_ERR((void *)userspace_addr);
6633
6634 memslot->userspace_addr = userspace_addr;
6635 }
6636 }
6637
6638
6639 return 0;
6640}
6641
6642void kvm_arch_commit_memory_region(struct kvm *kvm,
6643 struct kvm_userspace_memory_region *mem,
6644 struct kvm_memory_slot old,
6645 int user_alloc)
6646{
6647
6648 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6649
6650 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6651 int ret;
6652
6653 down_write(¤t->mm->mmap_sem);
6654 ret = do_munmap(current->mm, old.userspace_addr,
6655 old.npages * PAGE_SIZE);
6656 up_write(¤t->mm->mmap_sem);
6657 if (ret < 0)
6658 printk(KERN_WARNING
6659 "kvm_vm_ioctl_set_memory_region: "
6660 "failed to munmap memory\n");
6661 }
6662
6663 if (!kvm->arch.n_requested_mmu_pages)
6664 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6665
6666 spin_lock(&kvm->mmu_lock);
6667 if (nr_mmu_pages)
6668 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6669 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6670 spin_unlock(&kvm->mmu_lock);
6671}
6672
6673void kvm_arch_flush_shadow(struct kvm *kvm)
6674{
6675 kvm_mmu_zap_all(kvm);
6676 kvm_reload_remote_mmus(kvm);
6677}
6678
6679int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6680{
6681 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6682 !vcpu->arch.apf.halted)
6683 || !list_empty_careful(&vcpu->async_pf.done)
6684 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6685 || atomic_read(&vcpu->arch.nmi_queued) ||
6686 (kvm_arch_interrupt_allowed(vcpu) &&
6687 kvm_cpu_has_interrupt(vcpu));
6688}
6689
6690void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
6691{
6692 int me;
6693 int cpu = vcpu->cpu;
6694
6695 if (waitqueue_active(&vcpu->wq)) {
6696 wake_up_interruptible(&vcpu->wq);
6697 ++vcpu->stat.halt_wakeup;
6698 }
6699
6700 me = get_cpu();
6701 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
6702 if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
6703 smp_send_reschedule(cpu);
6704 put_cpu();
6705}
6706
6707int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
6708{
6709 return kvm_x86_ops->interrupt_allowed(vcpu);
6710}
6711
6712bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
6713{
6714 unsigned long current_rip = kvm_rip_read(vcpu) +
6715 get_segment_base(vcpu, VCPU_SREG_CS);
6716
6717 return current_rip == linear_rip;
6718}
6719EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
6720
6721unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
6722{
6723 unsigned long rflags;
6724
6725 rflags = kvm_x86_ops->get_rflags(vcpu);
6726 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6727 rflags &= ~X86_EFLAGS_TF;
6728 return rflags;
6729}
6730EXPORT_SYMBOL_GPL(kvm_get_rflags);
6731
6732void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6733{
6734 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
6735 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
6736 rflags |= X86_EFLAGS_TF;
6737 kvm_x86_ops->set_rflags(vcpu, rflags);
6738 kvm_make_request(KVM_REQ_EVENT, vcpu);
6739}
6740EXPORT_SYMBOL_GPL(kvm_set_rflags);
6741
6742void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
6743{
6744 int r;
6745
6746 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
6747 is_error_page(work->page))
6748 return;
6749
6750 r = kvm_mmu_reload(vcpu);
6751 if (unlikely(r))
6752 return;
6753
6754 if (!vcpu->arch.mmu.direct_map &&
6755 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
6756 return;
6757
6758 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
6759}
6760
6761static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6762{
6763 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6764}
6765
6766static inline u32 kvm_async_pf_next_probe(u32 key)
6767{
6768 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6769}
6770
6771static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6772{
6773 u32 key = kvm_async_pf_hash_fn(gfn);
6774
6775 while (vcpu->arch.apf.gfns[key] != ~0)
6776 key = kvm_async_pf_next_probe(key);
6777
6778 vcpu->arch.apf.gfns[key] = gfn;
6779}
6780
6781static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6782{
6783 int i;
6784 u32 key = kvm_async_pf_hash_fn(gfn);
6785
6786 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6787 (vcpu->arch.apf.gfns[key] != gfn &&
6788 vcpu->arch.apf.gfns[key] != ~0); i++)
6789 key = kvm_async_pf_next_probe(key);
6790
6791 return key;
6792}
6793
6794bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6795{
6796 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6797}
6798
6799static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6800{
6801 u32 i, j, k;
6802
6803 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6804 while (true) {
6805 vcpu->arch.apf.gfns[i] = ~0;
6806 do {
6807 j = kvm_async_pf_next_probe(j);
6808 if (vcpu->arch.apf.gfns[j] == ~0)
6809 return;
6810 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6811
6812
6813
6814
6815
6816 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6817 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6818 i = j;
6819 }
6820}
6821
6822static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
6823{
6824
6825 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
6826 sizeof(val));
6827}
6828
6829void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6830 struct kvm_async_pf *work)
6831{
6832 struct x86_exception fault;
6833
6834 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
6835 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6836
6837 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
6838 (vcpu->arch.apf.send_user_only &&
6839 kvm_x86_ops->get_cpl(vcpu) == 0))
6840 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6841 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
6842 fault.vector = PF_VECTOR;
6843 fault.error_code_valid = true;
6844 fault.error_code = 0;
6845 fault.nested_page_fault = false;
6846 fault.address = work->arch.token;
6847 kvm_inject_page_fault(vcpu, &fault);
6848 }
6849}
6850
6851void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6852 struct kvm_async_pf *work)
6853{
6854 struct x86_exception fault;
6855
6856 trace_kvm_async_pf_ready(work->arch.token, work->gva);
6857 if (is_error_page(work->page))
6858 work->arch.token = ~0;
6859 else
6860 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6861
6862 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
6863 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
6864 fault.vector = PF_VECTOR;
6865 fault.error_code_valid = true;
6866 fault.error_code = 0;
6867 fault.nested_page_fault = false;
6868 fault.address = work->arch.token;
6869 kvm_inject_page_fault(vcpu, &fault);
6870 }
6871 vcpu->arch.apf.halted = false;
6872}
6873
6874bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
6875{
6876 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
6877 return true;
6878 else
6879 return !kvm_event_needs_reinjection(vcpu) &&
6880 kvm_x86_ops->interrupt_allowed(vcpu);
6881}
6882
6883EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6884EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6885EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
6886EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
6887EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
6888EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
6889EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
6890EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
6891EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
6892EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
6893EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
6894EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
6895