linux/arch/mips/kvm/mmu.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * KVM/MIPS MMU handling in the KVM module.
   7 *
   8 * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
   9 * Authors: Sanjay Lal <sanjayl@kymasys.com>
  10 */
  11
  12#include <linux/highmem.h>
  13#include <linux/kvm_host.h>
  14#include <linux/uaccess.h>
  15#include <asm/mmu_context.h>
  16#include <asm/pgalloc.h>
  17
  18/*
  19 * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
  20 * for which pages need to be cached.
  21 */
  22#if defined(__PAGETABLE_PMD_FOLDED)
  23#define KVM_MMU_CACHE_MIN_PAGES 1
  24#else
  25#define KVM_MMU_CACHE_MIN_PAGES 2
  26#endif
  27
  28void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
  29{
  30        kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
  31}
  32
  33/**
  34 * kvm_pgd_init() - Initialise KVM GPA page directory.
  35 * @page:       Pointer to page directory (PGD) for KVM GPA.
  36 *
  37 * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
  38 * representing no mappings. This is similar to pgd_init(), however it
  39 * initialises all the page directory pointers, not just the ones corresponding
  40 * to the userland address space (since it is for the guest physical address
  41 * space rather than a virtual address space).
  42 */
  43static void kvm_pgd_init(void *page)
  44{
  45        unsigned long *p, *end;
  46        unsigned long entry;
  47
  48#ifdef __PAGETABLE_PMD_FOLDED
  49        entry = (unsigned long)invalid_pte_table;
  50#else
  51        entry = (unsigned long)invalid_pmd_table;
  52#endif
  53
  54        p = (unsigned long *)page;
  55        end = p + PTRS_PER_PGD;
  56
  57        do {
  58                p[0] = entry;
  59                p[1] = entry;
  60                p[2] = entry;
  61                p[3] = entry;
  62                p[4] = entry;
  63                p += 8;
  64                p[-3] = entry;
  65                p[-2] = entry;
  66                p[-1] = entry;
  67        } while (p != end);
  68}
  69
  70/**
  71 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
  72 *
  73 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
  74 * to host physical page mappings.
  75 *
  76 * Returns:     Pointer to new KVM GPA page directory.
  77 *              NULL on allocation failure.
  78 */
  79pgd_t *kvm_pgd_alloc(void)
  80{
  81        pgd_t *ret;
  82
  83        ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER);
  84        if (ret)
  85                kvm_pgd_init(ret);
  86
  87        return ret;
  88}
  89
  90/**
  91 * kvm_mips_walk_pgd() - Walk page table with optional allocation.
  92 * @pgd:        Page directory pointer.
  93 * @addr:       Address to index page table using.
  94 * @cache:      MMU page cache to allocate new page tables from, or NULL.
  95 *
  96 * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
  97 * address @addr. If page tables don't exist for @addr, they will be created
  98 * from the MMU cache if @cache is not NULL.
  99 *
 100 * Returns:     Pointer to pte_t corresponding to @addr.
 101 *              NULL if a page table doesn't exist for @addr and !@cache.
 102 *              NULL if a page table allocation failed.
 103 */
 104static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 105                                unsigned long addr)
 106{
 107        p4d_t *p4d;
 108        pud_t *pud;
 109        pmd_t *pmd;
 110
 111        pgd += pgd_index(addr);
 112        if (pgd_none(*pgd)) {
 113                /* Not used on MIPS yet */
 114                BUG();
 115                return NULL;
 116        }
 117        p4d = p4d_offset(pgd, addr);
 118        pud = pud_offset(p4d, addr);
 119        if (pud_none(*pud)) {
 120                pmd_t *new_pmd;
 121
 122                if (!cache)
 123                        return NULL;
 124                new_pmd = kvm_mmu_memory_cache_alloc(cache);
 125                pmd_init((unsigned long)new_pmd,
 126                         (unsigned long)invalid_pte_table);
 127                pud_populate(NULL, pud, new_pmd);
 128        }
 129        pmd = pmd_offset(pud, addr);
 130        if (pmd_none(*pmd)) {
 131                pte_t *new_pte;
 132
 133                if (!cache)
 134                        return NULL;
 135                new_pte = kvm_mmu_memory_cache_alloc(cache);
 136                clear_page(new_pte);
 137                pmd_populate_kernel(NULL, pmd, new_pte);
 138        }
 139        return pte_offset_kernel(pmd, addr);
 140}
 141
 142/* Caller must hold kvm->mm_lock */
 143static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
 144                                   struct kvm_mmu_memory_cache *cache,
 145                                   unsigned long addr)
 146{
 147        return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
 148}
 149
 150/*
 151 * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
 152 * Flush a range of guest physical address space from the VM's GPA page tables.
 153 */
 154
 155static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
 156                                   unsigned long end_gpa)
 157{
 158        int i_min = pte_index(start_gpa);
 159        int i_max = pte_index(end_gpa);
 160        bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
 161        int i;
 162
 163        for (i = i_min; i <= i_max; ++i) {
 164                if (!pte_present(pte[i]))
 165                        continue;
 166
 167                set_pte(pte + i, __pte(0));
 168        }
 169        return safe_to_remove;
 170}
 171
 172static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
 173                                   unsigned long end_gpa)
 174{
 175        pte_t *pte;
 176        unsigned long end = ~0ul;
 177        int i_min = pmd_index(start_gpa);
 178        int i_max = pmd_index(end_gpa);
 179        bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 180        int i;
 181
 182        for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 183                if (!pmd_present(pmd[i]))
 184                        continue;
 185
 186                pte = pte_offset_kernel(pmd + i, 0);
 187                if (i == i_max)
 188                        end = end_gpa;
 189
 190                if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
 191                        pmd_clear(pmd + i);
 192                        pte_free_kernel(NULL, pte);
 193                } else {
 194                        safe_to_remove = false;
 195                }
 196        }
 197        return safe_to_remove;
 198}
 199
 200static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 201                                   unsigned long end_gpa)
 202{
 203        pmd_t *pmd;
 204        unsigned long end = ~0ul;
 205        int i_min = pud_index(start_gpa);
 206        int i_max = pud_index(end_gpa);
 207        bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 208        int i;
 209
 210        for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 211                if (!pud_present(pud[i]))
 212                        continue;
 213
 214                pmd = pmd_offset(pud + i, 0);
 215                if (i == i_max)
 216                        end = end_gpa;
 217
 218                if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
 219                        pud_clear(pud + i);
 220                        pmd_free(NULL, pmd);
 221                } else {
 222                        safe_to_remove = false;
 223                }
 224        }
 225        return safe_to_remove;
 226}
 227
 228static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 229                                   unsigned long end_gpa)
 230{
 231        p4d_t *p4d;
 232        pud_t *pud;
 233        unsigned long end = ~0ul;
 234        int i_min = pgd_index(start_gpa);
 235        int i_max = pgd_index(end_gpa);
 236        bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
 237        int i;
 238
 239        for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 240                if (!pgd_present(pgd[i]))
 241                        continue;
 242
 243                p4d = p4d_offset(pgd, 0);
 244                pud = pud_offset(p4d + i, 0);
 245                if (i == i_max)
 246                        end = end_gpa;
 247
 248                if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
 249                        pgd_clear(pgd + i);
 250                        pud_free(NULL, pud);
 251                } else {
 252                        safe_to_remove = false;
 253                }
 254        }
 255        return safe_to_remove;
 256}
 257
 258/**
 259 * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
 260 * @kvm:        KVM pointer.
 261 * @start_gfn:  Guest frame number of first page in GPA range to flush.
 262 * @end_gfn:    Guest frame number of last page in GPA range to flush.
 263 *
 264 * Flushes a range of GPA mappings from the GPA page tables.
 265 *
 266 * The caller must hold the @kvm->mmu_lock spinlock.
 267 *
 268 * Returns:     Whether its safe to remove the top level page directory because
 269 *              all lower levels have been removed.
 270 */
 271bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
 272{
 273        return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
 274                                      start_gfn << PAGE_SHIFT,
 275                                      end_gfn << PAGE_SHIFT);
 276}
 277
 278#define BUILD_PTE_RANGE_OP(name, op)                                    \
 279static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start,       \
 280                                 unsigned long end)                     \
 281{                                                                       \
 282        int ret = 0;                                                    \
 283        int i_min = pte_index(start);                           \
 284        int i_max = pte_index(end);                                     \
 285        int i;                                                          \
 286        pte_t old, new;                                                 \
 287                                                                        \
 288        for (i = i_min; i <= i_max; ++i) {                              \
 289                if (!pte_present(pte[i]))                               \
 290                        continue;                                       \
 291                                                                        \
 292                old = pte[i];                                           \
 293                new = op(old);                                          \
 294                if (pte_val(new) == pte_val(old))                       \
 295                        continue;                                       \
 296                set_pte(pte + i, new);                                  \
 297                ret = 1;                                                \
 298        }                                                               \
 299        return ret;                                                     \
 300}                                                                       \
 301                                                                        \
 302/* returns true if anything was done */                                 \
 303static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,       \
 304                                 unsigned long end)                     \
 305{                                                                       \
 306        int ret = 0;                                                    \
 307        pte_t *pte;                                                     \
 308        unsigned long cur_end = ~0ul;                                   \
 309        int i_min = pmd_index(start);                           \
 310        int i_max = pmd_index(end);                                     \
 311        int i;                                                          \
 312                                                                        \
 313        for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 314                if (!pmd_present(pmd[i]))                               \
 315                        continue;                                       \
 316                                                                        \
 317                pte = pte_offset_kernel(pmd + i, 0);                            \
 318                if (i == i_max)                                         \
 319                        cur_end = end;                                  \
 320                                                                        \
 321                ret |= kvm_mips_##name##_pte(pte, start, cur_end);      \
 322        }                                                               \
 323        return ret;                                                     \
 324}                                                                       \
 325                                                                        \
 326static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,       \
 327                                 unsigned long end)                     \
 328{                                                                       \
 329        int ret = 0;                                                    \
 330        pmd_t *pmd;                                                     \
 331        unsigned long cur_end = ~0ul;                                   \
 332        int i_min = pud_index(start);                           \
 333        int i_max = pud_index(end);                                     \
 334        int i;                                                          \
 335                                                                        \
 336        for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 337                if (!pud_present(pud[i]))                               \
 338                        continue;                                       \
 339                                                                        \
 340                pmd = pmd_offset(pud + i, 0);                           \
 341                if (i == i_max)                                         \
 342                        cur_end = end;                                  \
 343                                                                        \
 344                ret |= kvm_mips_##name##_pmd(pmd, start, cur_end);      \
 345        }                                                               \
 346        return ret;                                                     \
 347}                                                                       \
 348                                                                        \
 349static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,       \
 350                                 unsigned long end)                     \
 351{                                                                       \
 352        int ret = 0;                                                    \
 353        p4d_t *p4d;                                                     \
 354        pud_t *pud;                                                     \
 355        unsigned long cur_end = ~0ul;                                   \
 356        int i_min = pgd_index(start);                                   \
 357        int i_max = pgd_index(end);                                     \
 358        int i;                                                          \
 359                                                                        \
 360        for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 361                if (!pgd_present(pgd[i]))                               \
 362                        continue;                                       \
 363                                                                        \
 364                p4d = p4d_offset(pgd, 0);                               \
 365                pud = pud_offset(p4d + i, 0);                           \
 366                if (i == i_max)                                         \
 367                        cur_end = end;                                  \
 368                                                                        \
 369                ret |= kvm_mips_##name##_pud(pud, start, cur_end);      \
 370        }                                                               \
 371        return ret;                                                     \
 372}
 373
 374/*
 375 * kvm_mips_mkclean_gpa_pt.
 376 * Mark a range of guest physical address space clean (writes fault) in the VM's
 377 * GPA page table to allow dirty page tracking.
 378 */
 379
 380BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
 381
 382/**
 383 * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
 384 * @kvm:        KVM pointer.
 385 * @start_gfn:  Guest frame number of first page in GPA range to flush.
 386 * @end_gfn:    Guest frame number of last page in GPA range to flush.
 387 *
 388 * Make a range of GPA mappings clean so that guest writes will fault and
 389 * trigger dirty page logging.
 390 *
 391 * The caller must hold the @kvm->mmu_lock spinlock.
 392 *
 393 * Returns:     Whether any GPA mappings were modified, which would require
 394 *              derived mappings (GVA page tables & TLB enties) to be
 395 *              invalidated.
 396 */
 397int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
 398{
 399        return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
 400                                    start_gfn << PAGE_SHIFT,
 401                                    end_gfn << PAGE_SHIFT);
 402}
 403
 404/**
 405 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
 406 * @kvm:        The KVM pointer
 407 * @slot:       The memory slot associated with mask
 408 * @gfn_offset: The gfn offset in memory slot
 409 * @mask:       The mask of dirty pages at offset 'gfn_offset' in this memory
 410 *              slot to be write protected
 411 *
 412 * Walks bits set in mask write protects the associated pte's. Caller must
 413 * acquire @kvm->mmu_lock.
 414 */
 415void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 416                struct kvm_memory_slot *slot,
 417                gfn_t gfn_offset, unsigned long mask)
 418{
 419        gfn_t base_gfn = slot->base_gfn + gfn_offset;
 420        gfn_t start = base_gfn +  __ffs(mask);
 421        gfn_t end = base_gfn + __fls(mask);
 422
 423        kvm_mips_mkclean_gpa_pt(kvm, start, end);
 424}
 425
 426/*
 427 * kvm_mips_mkold_gpa_pt.
 428 * Mark a range of guest physical address space old (all accesses fault) in the
 429 * VM's GPA page table to allow detection of commonly used pages.
 430 */
 431
 432BUILD_PTE_RANGE_OP(mkold, pte_mkold)
 433
 434static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
 435                                 gfn_t end_gfn)
 436{
 437        return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
 438                                  start_gfn << PAGE_SHIFT,
 439                                  end_gfn << PAGE_SHIFT);
 440}
 441
 442bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 443{
 444        kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
 445        return 1;
 446}
 447
 448bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 449{
 450        gpa_t gpa = range->start << PAGE_SHIFT;
 451        pte_t hva_pte = range->pte;
 452        pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 453        pte_t old_pte;
 454
 455        if (!gpa_pte)
 456                return false;
 457
 458        /* Mapping may need adjusting depending on memslot flags */
 459        old_pte = *gpa_pte;
 460        if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
 461                hva_pte = pte_mkclean(hva_pte);
 462        else if (range->slot->flags & KVM_MEM_READONLY)
 463                hva_pte = pte_wrprotect(hva_pte);
 464
 465        set_pte(gpa_pte, hva_pte);
 466
 467        /* Replacing an absent or old page doesn't need flushes */
 468        if (!pte_present(old_pte) || !pte_young(old_pte))
 469                return false;
 470
 471        /* Pages swapped, aged, moved, or cleaned require flushes */
 472        return !pte_present(hva_pte) ||
 473               !pte_young(hva_pte) ||
 474               pte_pfn(old_pte) != pte_pfn(hva_pte) ||
 475               (pte_dirty(old_pte) && !pte_dirty(hva_pte));
 476}
 477
 478bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 479{
 480        return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
 481}
 482
 483bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 484{
 485        gpa_t gpa = range->start << PAGE_SHIFT;
 486        pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 487
 488        if (!gpa_pte)
 489                return 0;
 490        return pte_young(*gpa_pte);
 491}
 492
 493/**
 494 * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
 495 * @vcpu:               VCPU pointer.
 496 * @gpa:                Guest physical address of fault.
 497 * @write_fault:        Whether the fault was due to a write.
 498 * @out_entry:          New PTE for @gpa (written on success unless NULL).
 499 * @out_buddy:          New PTE for @gpa's buddy (written on success unless
 500 *                      NULL).
 501 *
 502 * Perform fast path GPA fault handling, doing all that can be done without
 503 * calling into KVM. This handles marking old pages young (for idle page
 504 * tracking), and dirtying of clean pages (for dirty page logging).
 505 *
 506 * Returns:     0 on success, in which case we can update derived mappings and
 507 *              resume guest execution.
 508 *              -EFAULT on failure due to absent GPA mapping or write to
 509 *              read-only page, in which case KVM must be consulted.
 510 */
 511static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
 512                                   bool write_fault,
 513                                   pte_t *out_entry, pte_t *out_buddy)
 514{
 515        struct kvm *kvm = vcpu->kvm;
 516        gfn_t gfn = gpa >> PAGE_SHIFT;
 517        pte_t *ptep;
 518        kvm_pfn_t pfn = 0;      /* silence bogus GCC warning */
 519        bool pfn_valid = false;
 520        int ret = 0;
 521
 522        spin_lock(&kvm->mmu_lock);
 523
 524        /* Fast path - just check GPA page table for an existing entry */
 525        ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 526        if (!ptep || !pte_present(*ptep)) {
 527                ret = -EFAULT;
 528                goto out;
 529        }
 530
 531        /* Track access to pages marked old */
 532        if (!pte_young(*ptep)) {
 533                set_pte(ptep, pte_mkyoung(*ptep));
 534                pfn = pte_pfn(*ptep);
 535                pfn_valid = true;
 536                /* call kvm_set_pfn_accessed() after unlock */
 537        }
 538        if (write_fault && !pte_dirty(*ptep)) {
 539                if (!pte_write(*ptep)) {
 540                        ret = -EFAULT;
 541                        goto out;
 542                }
 543
 544                /* Track dirtying of writeable pages */
 545                set_pte(ptep, pte_mkdirty(*ptep));
 546                pfn = pte_pfn(*ptep);
 547                mark_page_dirty(kvm, gfn);
 548                kvm_set_pfn_dirty(pfn);
 549        }
 550
 551        if (out_entry)
 552                *out_entry = *ptep;
 553        if (out_buddy)
 554                *out_buddy = *ptep_buddy(ptep);
 555
 556out:
 557        spin_unlock(&kvm->mmu_lock);
 558        if (pfn_valid)
 559                kvm_set_pfn_accessed(pfn);
 560        return ret;
 561}
 562
 563/**
 564 * kvm_mips_map_page() - Map a guest physical page.
 565 * @vcpu:               VCPU pointer.
 566 * @gpa:                Guest physical address of fault.
 567 * @write_fault:        Whether the fault was due to a write.
 568 * @out_entry:          New PTE for @gpa (written on success unless NULL).
 569 * @out_buddy:          New PTE for @gpa's buddy (written on success unless
 570 *                      NULL).
 571 *
 572 * Handle GPA faults by creating a new GPA mapping (or updating an existing
 573 * one).
 574 *
 575 * This takes care of marking pages young or dirty (idle/dirty page tracking),
 576 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
 577 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
 578 * caller.
 579 *
 580 * Returns:     0 on success, in which case the caller may use the @out_entry
 581 *              and @out_buddy PTEs to update derived mappings and resume guest
 582 *              execution.
 583 *              -EFAULT if there is no memory region at @gpa or a write was
 584 *              attempted to a read-only memory region. This is usually handled
 585 *              as an MMIO access.
 586 */
 587static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
 588                             bool write_fault,
 589                             pte_t *out_entry, pte_t *out_buddy)
 590{
 591        struct kvm *kvm = vcpu->kvm;
 592        struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 593        gfn_t gfn = gpa >> PAGE_SHIFT;
 594        int srcu_idx, err;
 595        kvm_pfn_t pfn;
 596        pte_t *ptep, entry, old_pte;
 597        bool writeable;
 598        unsigned long prot_bits;
 599        unsigned long mmu_seq;
 600
 601        /* Try the fast path to handle old / clean pages */
 602        srcu_idx = srcu_read_lock(&kvm->srcu);
 603        err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
 604                                      out_buddy);
 605        if (!err)
 606                goto out;
 607
 608        /* We need a minimum of cached pages ready for page table creation */
 609        err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
 610        if (err)
 611                goto out;
 612
 613retry:
 614        /*
 615         * Used to check for invalidations in progress, of the pfn that is
 616         * returned by pfn_to_pfn_prot below.
 617         */
 618        mmu_seq = kvm->mmu_notifier_seq;
 619        /*
 620         * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
 621         * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
 622         * risk the page we get a reference to getting unmapped before we have a
 623         * chance to grab the mmu_lock without mmu_notifier_retry() noticing.
 624         *
 625         * This smp_rmb() pairs with the effective smp_wmb() of the combination
 626         * of the pte_unmap_unlock() after the PTE is zapped, and the
 627         * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
 628         * mmu_notifier_seq is incremented.
 629         */
 630        smp_rmb();
 631
 632        /* Slow path - ask KVM core whether we can access this GPA */
 633        pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
 634        if (is_error_noslot_pfn(pfn)) {
 635                err = -EFAULT;
 636                goto out;
 637        }
 638
 639        spin_lock(&kvm->mmu_lock);
 640        /* Check if an invalidation has taken place since we got pfn */
 641        if (mmu_notifier_retry(kvm, mmu_seq)) {
 642                /*
 643                 * This can happen when mappings are changed asynchronously, but
 644                 * also synchronously if a COW is triggered by
 645                 * gfn_to_pfn_prot().
 646                 */
 647                spin_unlock(&kvm->mmu_lock);
 648                kvm_release_pfn_clean(pfn);
 649                goto retry;
 650        }
 651
 652        /* Ensure page tables are allocated */
 653        ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
 654
 655        /* Set up the PTE */
 656        prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
 657        if (writeable) {
 658                prot_bits |= _PAGE_WRITE;
 659                if (write_fault) {
 660                        prot_bits |= __WRITEABLE;
 661                        mark_page_dirty(kvm, gfn);
 662                        kvm_set_pfn_dirty(pfn);
 663                }
 664        }
 665        entry = pfn_pte(pfn, __pgprot(prot_bits));
 666
 667        /* Write the PTE */
 668        old_pte = *ptep;
 669        set_pte(ptep, entry);
 670
 671        err = 0;
 672        if (out_entry)
 673                *out_entry = *ptep;
 674        if (out_buddy)
 675                *out_buddy = *ptep_buddy(ptep);
 676
 677        spin_unlock(&kvm->mmu_lock);
 678        kvm_release_pfn_clean(pfn);
 679        kvm_set_pfn_accessed(pfn);
 680out:
 681        srcu_read_unlock(&kvm->srcu, srcu_idx);
 682        return err;
 683}
 684
 685int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
 686                                      struct kvm_vcpu *vcpu,
 687                                      bool write_fault)
 688{
 689        int ret;
 690
 691        ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
 692        if (ret)
 693                return ret;
 694
 695        /* Invalidate this entry in the TLB */
 696        return kvm_vz_host_tlb_inv(vcpu, badvaddr);
 697}
 698
 699/**
 700 * kvm_mips_migrate_count() - Migrate timer.
 701 * @vcpu:       Virtual CPU.
 702 *
 703 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
 704 * if it was running prior to being cancelled.
 705 *
 706 * Must be called when the VCPU is migrated to a different CPU to ensure that
 707 * timer expiry during guest execution interrupts the guest and causes the
 708 * interrupt to be delivered in a timely manner.
 709 */
 710static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
 711{
 712        if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
 713                hrtimer_restart(&vcpu->arch.comparecount_timer);
 714}
 715
 716/* Restore ASID once we are scheduled back after preemption */
 717void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 718{
 719        unsigned long flags;
 720
 721        kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
 722
 723        local_irq_save(flags);
 724
 725        vcpu->cpu = cpu;
 726        if (vcpu->arch.last_sched_cpu != cpu) {
 727                kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
 728                          vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
 729                /*
 730                 * Migrate the timer interrupt to the current CPU so that it
 731                 * always interrupts the guest and synchronously triggers a
 732                 * guest timer interrupt.
 733                 */
 734                kvm_mips_migrate_count(vcpu);
 735        }
 736
 737        /* restore guest state to registers */
 738        kvm_mips_callbacks->vcpu_load(vcpu, cpu);
 739
 740        local_irq_restore(flags);
 741}
 742
 743/* ASID can change if another task is scheduled during preemption */
 744void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 745{
 746        unsigned long flags;
 747        int cpu;
 748
 749        local_irq_save(flags);
 750
 751        cpu = smp_processor_id();
 752        vcpu->arch.last_sched_cpu = cpu;
 753        vcpu->cpu = -1;
 754
 755        /* save guest state in registers */
 756        kvm_mips_callbacks->vcpu_put(vcpu, cpu);
 757
 758        local_irq_restore(flags);
 759}
 760