linux/arch/powerpc/kvm/book3s_hv_p9_entry.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/kernel.h>
   3#include <linux/kvm_host.h>
   4#include <asm/asm-prototypes.h>
   5#include <asm/dbell.h>
   6#include <asm/kvm_ppc.h>
   7#include <asm/ppc-opcode.h>
   8
   9#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
  10static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
  11{
  12        struct kvmppc_vcore *vc = vcpu->arch.vcore;
  13        u64 tb = mftb() - vc->tb_offset_applied;
  14
  15        vcpu->arch.cur_activity = next;
  16        vcpu->arch.cur_tb_start = tb;
  17}
  18
  19static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
  20{
  21        struct kvmppc_vcore *vc = vcpu->arch.vcore;
  22        struct kvmhv_tb_accumulator *curr;
  23        u64 tb = mftb() - vc->tb_offset_applied;
  24        u64 prev_tb;
  25        u64 delta;
  26        u64 seq;
  27
  28        curr = vcpu->arch.cur_activity;
  29        vcpu->arch.cur_activity = next;
  30        prev_tb = vcpu->arch.cur_tb_start;
  31        vcpu->arch.cur_tb_start = tb;
  32
  33        if (!curr)
  34                return;
  35
  36        delta = tb - prev_tb;
  37
  38        seq = curr->seqcount;
  39        curr->seqcount = seq + 1;
  40        smp_wmb();
  41        curr->tb_total += delta;
  42        if (seq == 0 || delta < curr->tb_min)
  43                curr->tb_min = delta;
  44        if (delta > curr->tb_max)
  45                curr->tb_max = delta;
  46        smp_wmb();
  47        curr->seqcount = seq + 2;
  48}
  49
  50#define start_timing(vcpu, next) __start_timing(vcpu, next)
  51#define end_timing(vcpu) __start_timing(vcpu, NULL)
  52#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
  53#else
  54#define start_timing(vcpu, next) do {} while (0)
  55#define end_timing(vcpu) do {} while (0)
  56#define accumulate_time(vcpu, next) do {} while (0)
  57#endif
  58
  59static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
  60{
  61        asm volatile("slbmfev  %0,%1" : "=r" (*slbev) : "r" (idx));
  62        asm volatile("slbmfee  %0,%1" : "=r" (*slbee) : "r" (idx));
  63}
  64
  65static inline void mtslb(u64 slbee, u64 slbev)
  66{
  67        asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
  68}
  69
  70static inline void clear_slb_entry(unsigned int idx)
  71{
  72        mtslb(idx, 0);
  73}
  74
  75static inline void slb_clear_invalidate_partition(void)
  76{
  77        clear_slb_entry(0);
  78        asm volatile(PPC_SLBIA(6));
  79}
  80
  81/*
  82 * Malicious or buggy radix guests may have inserted SLB entries
  83 * (only 0..3 because radix always runs with UPRT=1), so these must
  84 * be cleared here to avoid side-channels. slbmte is used rather
  85 * than slbia, as it won't clear cached translations.
  86 */
  87static void radix_clear_slb(void)
  88{
  89        int i;
  90
  91        for (i = 0; i < 4; i++)
  92                clear_slb_entry(i);
  93}
  94
  95static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
  96{
  97        struct kvm_nested_guest *nested = vcpu->arch.nested;
  98        u32 lpid;
  99
 100        lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
 101
 102        /*
 103         * All the isync()s are overkill but trivially follow the ISA
 104         * requirements. Some can likely be replaced with justification
 105         * comment for why they are not needed.
 106         */
 107        isync();
 108        mtspr(SPRN_LPID, lpid);
 109        isync();
 110        mtspr(SPRN_LPCR, lpcr);
 111        isync();
 112        mtspr(SPRN_PID, vcpu->arch.pid);
 113        isync();
 114}
 115
 116static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
 117{
 118        u32 lpid;
 119        int i;
 120
 121        lpid = kvm->arch.lpid;
 122
 123        mtspr(SPRN_LPID, lpid);
 124        mtspr(SPRN_LPCR, lpcr);
 125        mtspr(SPRN_PID, vcpu->arch.pid);
 126
 127        for (i = 0; i < vcpu->arch.slb_max; i++)
 128                mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
 129
 130        isync();
 131}
 132
 133static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
 134{
 135        isync();
 136        mtspr(SPRN_PID, pid);
 137        isync();
 138        mtspr(SPRN_LPID, kvm->arch.host_lpid);
 139        isync();
 140        mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
 141        isync();
 142
 143        if (!radix_enabled())
 144                slb_restore_bolted_realmode();
 145}
 146
 147static void save_clear_host_mmu(struct kvm *kvm)
 148{
 149        if (!radix_enabled()) {
 150                /*
 151                 * Hash host could save and restore host SLB entries to
 152                 * reduce SLB fault overheads of VM exits, but for now the
 153                 * existing code clears all entries and restores just the
 154                 * bolted ones when switching back to host.
 155                 */
 156                slb_clear_invalidate_partition();
 157        }
 158}
 159
 160static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
 161{
 162        if (kvm_is_radix(kvm)) {
 163                radix_clear_slb();
 164        } else {
 165                int i;
 166                int nr = 0;
 167
 168                /*
 169                 * This must run before switching to host (radix host can't
 170                 * access all SLBs).
 171                 */
 172                for (i = 0; i < vcpu->arch.slb_nr; i++) {
 173                        u64 slbee, slbev;
 174                        mfslb(i, &slbee, &slbev);
 175                        if (slbee & SLB_ESID_V) {
 176                                vcpu->arch.slb[nr].orige = slbee | i;
 177                                vcpu->arch.slb[nr].origv = slbev;
 178                                nr++;
 179                        }
 180                }
 181                vcpu->arch.slb_max = nr;
 182                slb_clear_invalidate_partition();
 183        }
 184}
 185
 186int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
 187{
 188        struct kvm *kvm = vcpu->kvm;
 189        struct kvm_nested_guest *nested = vcpu->arch.nested;
 190        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 191        s64 hdec;
 192        u64 tb, purr, spurr;
 193        u64 *exsave;
 194        bool ri_set;
 195        int trap;
 196        unsigned long msr;
 197        unsigned long host_hfscr;
 198        unsigned long host_ciabr;
 199        unsigned long host_dawr0;
 200        unsigned long host_dawrx0;
 201        unsigned long host_psscr;
 202        unsigned long host_pidr;
 203        unsigned long host_dawr1;
 204        unsigned long host_dawrx1;
 205
 206        hdec = time_limit - mftb();
 207        if (hdec < 0)
 208                return BOOK3S_INTERRUPT_HV_DECREMENTER;
 209
 210        WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
 211        WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
 212
 213        start_timing(vcpu, &vcpu->arch.rm_entry);
 214
 215        vcpu->arch.ceded = 0;
 216
 217        if (vc->tb_offset) {
 218                u64 new_tb = mftb() + vc->tb_offset;
 219                mtspr(SPRN_TBU40, new_tb);
 220                tb = mftb();
 221                if ((tb & 0xffffff) < (new_tb & 0xffffff))
 222                        mtspr(SPRN_TBU40, new_tb + 0x1000000);
 223                vc->tb_offset_applied = vc->tb_offset;
 224        }
 225
 226        msr = mfmsr();
 227
 228        host_hfscr = mfspr(SPRN_HFSCR);
 229        host_ciabr = mfspr(SPRN_CIABR);
 230        host_dawr0 = mfspr(SPRN_DAWR0);
 231        host_dawrx0 = mfspr(SPRN_DAWRX0);
 232        host_psscr = mfspr(SPRN_PSSCR);
 233        host_pidr = mfspr(SPRN_PID);
 234        if (cpu_has_feature(CPU_FTR_DAWR1)) {
 235                host_dawr1 = mfspr(SPRN_DAWR1);
 236                host_dawrx1 = mfspr(SPRN_DAWRX1);
 237        }
 238
 239        if (vc->pcr)
 240                mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
 241        mtspr(SPRN_DPDES, vc->dpdes);
 242        mtspr(SPRN_VTB, vc->vtb);
 243
 244        local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
 245        local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
 246        mtspr(SPRN_PURR, vcpu->arch.purr);
 247        mtspr(SPRN_SPURR, vcpu->arch.spurr);
 248
 249        if (dawr_enabled()) {
 250                mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
 251                mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
 252                if (cpu_has_feature(CPU_FTR_DAWR1)) {
 253                        mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
 254                        mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
 255                }
 256        }
 257        mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 258        mtspr(SPRN_IC, vcpu->arch.ic);
 259
 260        mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
 261              (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
 262
 263        mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
 264
 265        mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
 266        mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
 267
 268        /*
 269         * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
 270         * Interrupt (HDSI) the HDSISR is not be updated at all.
 271         *
 272         * To work around this we put a canary value into the HDSISR before
 273         * returning to a guest and then check for this canary when we take a
 274         * HDSI. If we find the canary on a HDSI, we know the hardware didn't
 275         * update the HDSISR. In this case we return to the guest to retake the
 276         * HDSI which should correctly update the HDSISR the second time HDSI
 277         * entry.
 278         *
 279         * Just do this on all p9 processors for now.
 280         */
 281        mtspr(SPRN_HDSISR, HDSISR_CANARY);
 282
 283        mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
 284        mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
 285        mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
 286        mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
 287
 288        mtspr(SPRN_AMOR, ~0UL);
 289
 290        local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
 291
 292        /*
 293         * Hash host, hash guest, or radix guest with prefetch bug, all have
 294         * to disable the MMU before switching to guest MMU state.
 295         */
 296        if (!radix_enabled() || !kvm_is_radix(kvm) ||
 297                        cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
 298                __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
 299
 300        save_clear_host_mmu(kvm);
 301
 302        if (kvm_is_radix(kvm)) {
 303                switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
 304                if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
 305                        __mtmsrd(0, 1); /* clear RI */
 306
 307        } else {
 308                switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
 309        }
 310
 311        /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
 312        kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
 313
 314        /*
 315         * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
 316         * so set guest LPCR (with HDICE) before writing HDEC.
 317         */
 318        mtspr(SPRN_HDEC, hdec);
 319
 320#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 321tm_return_to_guest:
 322#endif
 323        mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
 324        mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
 325        mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
 326        mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
 327
 328        accumulate_time(vcpu, &vcpu->arch.guest_time);
 329
 330        kvmppc_p9_enter_guest(vcpu);
 331
 332        accumulate_time(vcpu, &vcpu->arch.rm_intr);
 333
 334        /* XXX: Could get these from r11/12 and paca exsave instead */
 335        vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
 336        vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
 337        vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
 338        vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
 339
 340        /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
 341        trap = local_paca->kvm_hstate.scratch0 & ~0x2;
 342
 343        /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
 344        ri_set = false;
 345        if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
 346                if (trap != BOOK3S_INTERRUPT_SYSCALL &&
 347                                (vcpu->arch.shregs.msr & MSR_RI))
 348                        ri_set = true;
 349                exsave = local_paca->exgen;
 350        } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
 351                exsave = local_paca->exnmi;
 352        } else { /* trap == 0x200 */
 353                exsave = local_paca->exmc;
 354        }
 355
 356        vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
 357        vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
 358
 359        /*
 360         * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
 361         * and hstate scratch (which we need to move into exsave to make
 362         * re-entrant vs SRESET/MCE)
 363         */
 364        if (ri_set) {
 365                if (unlikely(!(mfmsr() & MSR_RI))) {
 366                        __mtmsrd(MSR_RI, 1);
 367                        WARN_ON_ONCE(1);
 368                }
 369        } else {
 370                WARN_ON_ONCE(mfmsr() & MSR_RI);
 371                __mtmsrd(MSR_RI, 1);
 372        }
 373
 374        vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
 375        vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
 376        vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
 377        vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
 378        vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
 379        vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
 380        vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
 381        vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
 382
 383        vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
 384
 385        if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
 386                vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
 387                vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
 388                kvmppc_realmode_machine_check(vcpu);
 389
 390        } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
 391                kvmppc_realmode_hmi_handler();
 392
 393        } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
 394                vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
 395
 396        } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
 397                vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
 398                vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
 399                vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
 400
 401        } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
 402                vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
 403
 404        } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
 405                vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
 406
 407#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 408        /*
 409         * Softpatch interrupt for transactional memory emulation cases
 410         * on POWER9 DD2.2.  This is early in the guest exit path - we
 411         * haven't saved registers or done a treclaim yet.
 412         */
 413        } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
 414                vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
 415
 416                /*
 417                 * The cases we want to handle here are those where the guest
 418                 * is in real suspend mode and is trying to transition to
 419                 * transactional mode.
 420                 */
 421                if (!local_paca->kvm_hstate.fake_suspend &&
 422                                (vcpu->arch.shregs.msr & MSR_TS_S)) {
 423                        if (kvmhv_p9_tm_emulation_early(vcpu)) {
 424                                /*
 425                                 * Go straight back into the guest with the
 426                                 * new NIP/MSR as set by TM emulation.
 427                                 */
 428                                mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
 429                                mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
 430
 431                                /*
 432                                 * tm_return_to_guest re-loads SRR0/1, DAR,
 433                                 * DSISR after RI is cleared, in case they had
 434                                 * been clobbered by a MCE.
 435                                 */
 436                                __mtmsrd(0, 1); /* clear RI */
 437                                goto tm_return_to_guest;
 438                        }
 439                }
 440#endif
 441        }
 442
 443        accumulate_time(vcpu, &vcpu->arch.rm_exit);
 444
 445        /* Advance host PURR/SPURR by the amount used by guest */
 446        purr = mfspr(SPRN_PURR);
 447        spurr = mfspr(SPRN_SPURR);
 448        mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
 449              purr - vcpu->arch.purr);
 450        mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
 451              spurr - vcpu->arch.spurr);
 452        vcpu->arch.purr = purr;
 453        vcpu->arch.spurr = spurr;
 454
 455        vcpu->arch.ic = mfspr(SPRN_IC);
 456        vcpu->arch.pid = mfspr(SPRN_PID);
 457        vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
 458
 459        vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
 460        vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
 461        vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
 462        vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
 463
 464        /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
 465        mtspr(SPRN_PSSCR, host_psscr |
 466              (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
 467        mtspr(SPRN_HFSCR, host_hfscr);
 468        mtspr(SPRN_CIABR, host_ciabr);
 469        mtspr(SPRN_DAWR0, host_dawr0);
 470        mtspr(SPRN_DAWRX0, host_dawrx0);
 471        if (cpu_has_feature(CPU_FTR_DAWR1)) {
 472                mtspr(SPRN_DAWR1, host_dawr1);
 473                mtspr(SPRN_DAWRX1, host_dawrx1);
 474        }
 475
 476        if (kvm_is_radix(kvm)) {
 477                /*
 478                 * Since this is radix, do a eieio; tlbsync; ptesync sequence
 479                 * in case we interrupted the guest between a tlbie and a
 480                 * ptesync.
 481                 */
 482                asm volatile("eieio; tlbsync; ptesync");
 483        }
 484
 485        /*
 486         * cp_abort is required if the processor supports local copy-paste
 487         * to clear the copy buffer that was under control of the guest.
 488         */
 489        if (cpu_has_feature(CPU_FTR_ARCH_31))
 490                asm volatile(PPC_CP_ABORT);
 491
 492        vc->dpdes = mfspr(SPRN_DPDES);
 493        vc->vtb = mfspr(SPRN_VTB);
 494        mtspr(SPRN_DPDES, 0);
 495        if (vc->pcr)
 496                mtspr(SPRN_PCR, PCR_MASK);
 497
 498        if (vc->tb_offset_applied) {
 499                u64 new_tb = mftb() - vc->tb_offset_applied;
 500                mtspr(SPRN_TBU40, new_tb);
 501                tb = mftb();
 502                if ((tb & 0xffffff) < (new_tb & 0xffffff))
 503                        mtspr(SPRN_TBU40, new_tb + 0x1000000);
 504                vc->tb_offset_applied = 0;
 505        }
 506
 507        mtspr(SPRN_HDEC, 0x7fffffff);
 508
 509        save_clear_guest_mmu(kvm, vcpu);
 510        switch_mmu_to_host(kvm, host_pidr);
 511        local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
 512
 513        /*
 514         * If we are in real mode, only switch MMU on after the MMU is
 515         * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
 516         */
 517        if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
 518            vcpu->arch.shregs.msr & MSR_TS_MASK)
 519                msr |= MSR_TS_S;
 520
 521        __mtmsrd(msr, 0);
 522
 523        end_timing(vcpu);
 524
 525        return trap;
 526}
 527EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
 528