linux/arch/x86/include/asm/paravirt.h
<<
>>
Prefs
   1#ifndef _ASM_X86_PARAVIRT_H
   2#define _ASM_X86_PARAVIRT_H
   3/* Various instructions on x86 need to be replaced for
   4 * para-virtualization: those hooks are defined here. */
   5
   6#ifdef CONFIG_PARAVIRT
   7#include <asm/page.h>
   8#include <asm/asm.h>
   9
  10/* Bitmask of what can be clobbered: usually at least eax. */
  11#define CLBR_NONE 0
  12#define CLBR_EAX  (1 << 0)
  13#define CLBR_ECX  (1 << 1)
  14#define CLBR_EDX  (1 << 2)
  15
  16#ifdef CONFIG_X86_64
  17#define CLBR_RSI  (1 << 3)
  18#define CLBR_RDI  (1 << 4)
  19#define CLBR_R8   (1 << 5)
  20#define CLBR_R9   (1 << 6)
  21#define CLBR_R10  (1 << 7)
  22#define CLBR_R11  (1 << 8)
  23#define CLBR_ANY  ((1 << 9) - 1)
  24#include <asm/desc_defs.h>
  25#else
  26/* CLBR_ANY should match all regs platform has. For i386, that's just it */
  27#define CLBR_ANY  ((1 << 3) - 1)
  28#endif /* X86_64 */
  29
  30#ifndef __ASSEMBLY__
  31#include <linux/types.h>
  32#include <linux/cpumask.h>
  33#include <asm/kmap_types.h>
  34#include <asm/desc_defs.h>
  35
  36struct page;
  37struct thread_struct;
  38struct desc_ptr;
  39struct tss_struct;
  40struct mm_struct;
  41struct desc_struct;
  42
  43/* general info */
  44struct pv_info {
  45        unsigned int kernel_rpl;
  46        int shared_kernel_pmd;
  47        int paravirt_enabled;
  48        const char *name;
  49};
  50
  51struct pv_init_ops {
  52        /*
  53         * Patch may replace one of the defined code sequences with
  54         * arbitrary code, subject to the same register constraints.
  55         * This generally means the code is not free to clobber any
  56         * registers other than EAX.  The patch function should return
  57         * the number of bytes of code generated, as we nop pad the
  58         * rest in generic code.
  59         */
  60        unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
  61                          unsigned long addr, unsigned len);
  62
  63        /* Basic arch-specific setup */
  64        void (*arch_setup)(void);
  65        char *(*memory_setup)(void);
  66        void (*post_allocator_init)(void);
  67
  68        /* Print a banner to identify the environment */
  69        void (*banner)(void);
  70};
  71
  72
  73struct pv_lazy_ops {
  74        /* Set deferred update mode, used for batching operations. */
  75        void (*enter)(void);
  76        void (*leave)(void);
  77};
  78
  79struct pv_time_ops {
  80        void (*time_init)(void);
  81
  82        /* Set and set time of day */
  83        unsigned long (*get_wallclock)(void);
  84        int (*set_wallclock)(unsigned long);
  85
  86        unsigned long long (*sched_clock)(void);
  87        unsigned long (*get_tsc_khz)(void);
  88};
  89
  90struct pv_cpu_ops {
  91        /* hooks for various privileged instructions */
  92        unsigned long (*get_debugreg)(int regno);
  93        void (*set_debugreg)(int regno, unsigned long value);
  94
  95        void (*clts)(void);
  96
  97        unsigned long (*read_cr0)(void);
  98        void (*write_cr0)(unsigned long);
  99
 100        unsigned long (*read_cr4_safe)(void);
 101        unsigned long (*read_cr4)(void);
 102        void (*write_cr4)(unsigned long);
 103
 104#ifdef CONFIG_X86_64
 105        unsigned long (*read_cr8)(void);
 106        void (*write_cr8)(unsigned long);
 107#endif
 108
 109        /* Segment descriptor handling */
 110        void (*load_tr_desc)(void);
 111        void (*load_gdt)(const struct desc_ptr *);
 112        void (*load_idt)(const struct desc_ptr *);
 113        void (*store_gdt)(struct desc_ptr *);
 114        void (*store_idt)(struct desc_ptr *);
 115        void (*set_ldt)(const void *desc, unsigned entries);
 116        unsigned long (*store_tr)(void);
 117        void (*load_tls)(struct thread_struct *t, unsigned int cpu);
 118#ifdef CONFIG_X86_64
 119        void (*load_gs_index)(unsigned int idx);
 120#endif
 121        void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
 122                                const void *desc);
 123        void (*write_gdt_entry)(struct desc_struct *,
 124                                int entrynum, const void *desc, int size);
 125        void (*write_idt_entry)(gate_desc *,
 126                                int entrynum, const gate_desc *gate);
 127        void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
 128        void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
 129
 130        void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 131
 132        void (*set_iopl_mask)(unsigned mask);
 133
 134        void (*wbinvd)(void);
 135        void (*io_delay)(void);
 136
 137        /* cpuid emulation, mostly so that caps bits can be disabled */
 138        void (*cpuid)(unsigned int *eax, unsigned int *ebx,
 139                      unsigned int *ecx, unsigned int *edx);
 140
 141        /* MSR, PMC and TSR operations.
 142           err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
 143        u64 (*read_msr_amd)(unsigned int msr, int *err);
 144        u64 (*read_msr)(unsigned int msr, int *err);
 145        int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
 146
 147        u64 (*read_tsc)(void);
 148        u64 (*read_pmc)(int counter);
 149        unsigned long long (*read_tscp)(unsigned int *aux);
 150
 151        /*
 152         * Atomically enable interrupts and return to userspace.  This
 153         * is only ever used to return to 32-bit processes; in a
 154         * 64-bit kernel, it's used for 32-on-64 compat processes, but
 155         * never native 64-bit processes.  (Jump, not call.)
 156         */
 157        void (*irq_enable_sysexit)(void);
 158
 159        /*
 160         * Switch to usermode gs and return to 64-bit usermode using
 161         * sysret.  Only used in 64-bit kernels to return to 64-bit
 162         * processes.  Usermode register state, including %rsp, must
 163         * already be restored.
 164         */
 165        void (*usergs_sysret64)(void);
 166
 167        /*
 168         * Switch to usermode gs and return to 32-bit usermode using
 169         * sysret.  Used to return to 32-on-64 compat processes.
 170         * Other usermode register state, including %esp, must already
 171         * be restored.
 172         */
 173        void (*usergs_sysret32)(void);
 174
 175        /* Normal iret.  Jump to this with the standard iret stack
 176           frame set up. */
 177        void (*iret)(void);
 178
 179        void (*swapgs)(void);
 180
 181        struct pv_lazy_ops lazy_mode;
 182};
 183
 184struct pv_irq_ops {
 185        void (*init_IRQ)(void);
 186
 187        /*
 188         * Get/set interrupt state.  save_fl and restore_fl are only
 189         * expected to use X86_EFLAGS_IF; all other bits
 190         * returned from save_fl are undefined, and may be ignored by
 191         * restore_fl.
 192         */
 193        unsigned long (*save_fl)(void);
 194        void (*restore_fl)(unsigned long);
 195        void (*irq_disable)(void);
 196        void (*irq_enable)(void);
 197        void (*safe_halt)(void);
 198        void (*halt)(void);
 199
 200#ifdef CONFIG_X86_64
 201        void (*adjust_exception_frame)(void);
 202#endif
 203};
 204
 205struct pv_apic_ops {
 206#ifdef CONFIG_X86_LOCAL_APIC
 207        void (*setup_boot_clock)(void);
 208        void (*setup_secondary_clock)(void);
 209
 210        void (*startup_ipi_hook)(int phys_apicid,
 211                                 unsigned long start_eip,
 212                                 unsigned long start_esp);
 213#endif
 214};
 215
 216struct pv_mmu_ops {
 217        /*
 218         * Called before/after init_mm pagetable setup. setup_start
 219         * may reset %cr3, and may pre-install parts of the pagetable;
 220         * pagetable setup is expected to preserve any existing
 221         * mapping.
 222         */
 223        void (*pagetable_setup_start)(pgd_t *pgd_base);
 224        void (*pagetable_setup_done)(pgd_t *pgd_base);
 225
 226        unsigned long (*read_cr2)(void);
 227        void (*write_cr2)(unsigned long);
 228
 229        unsigned long (*read_cr3)(void);
 230        void (*write_cr3)(unsigned long);
 231
 232        /*
 233         * Hooks for intercepting the creation/use/destruction of an
 234         * mm_struct.
 235         */
 236        void (*activate_mm)(struct mm_struct *prev,
 237                            struct mm_struct *next);
 238        void (*dup_mmap)(struct mm_struct *oldmm,
 239                         struct mm_struct *mm);
 240        void (*exit_mmap)(struct mm_struct *mm);
 241
 242
 243        /* TLB operations */
 244        void (*flush_tlb_user)(void);
 245        void (*flush_tlb_kernel)(void);
 246        void (*flush_tlb_single)(unsigned long addr);
 247        void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
 248                                 unsigned long va);
 249
 250        /* Hooks for allocating and freeing a pagetable top-level */
 251        int  (*pgd_alloc)(struct mm_struct *mm);
 252        void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
 253
 254        /*
 255         * Hooks for allocating/releasing pagetable pages when they're
 256         * attached to a pagetable
 257         */
 258        void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
 259        void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
 260        void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
 261        void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
 262        void (*release_pte)(unsigned long pfn);
 263        void (*release_pmd)(unsigned long pfn);
 264        void (*release_pud)(unsigned long pfn);
 265
 266        /* Pagetable manipulation functions */
 267        void (*set_pte)(pte_t *ptep, pte_t pteval);
 268        void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
 269                           pte_t *ptep, pte_t pteval);
 270        void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
 271        void (*pte_update)(struct mm_struct *mm, unsigned long addr,
 272                           pte_t *ptep);
 273        void (*pte_update_defer)(struct mm_struct *mm,
 274                                 unsigned long addr, pte_t *ptep);
 275
 276        pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
 277                                        pte_t *ptep);
 278        void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
 279                                        pte_t *ptep, pte_t pte);
 280
 281        pteval_t (*pte_val)(pte_t);
 282        pteval_t (*pte_flags)(pte_t);
 283        pte_t (*make_pte)(pteval_t pte);
 284
 285        pgdval_t (*pgd_val)(pgd_t);
 286        pgd_t (*make_pgd)(pgdval_t pgd);
 287
 288#if PAGETABLE_LEVELS >= 3
 289#ifdef CONFIG_X86_PAE
 290        void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
 291        void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
 292                                pte_t *ptep, pte_t pte);
 293        void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
 294                          pte_t *ptep);
 295        void (*pmd_clear)(pmd_t *pmdp);
 296
 297#endif  /* CONFIG_X86_PAE */
 298
 299        void (*set_pud)(pud_t *pudp, pud_t pudval);
 300
 301        pmdval_t (*pmd_val)(pmd_t);
 302        pmd_t (*make_pmd)(pmdval_t pmd);
 303
 304#if PAGETABLE_LEVELS == 4
 305        pudval_t (*pud_val)(pud_t);
 306        pud_t (*make_pud)(pudval_t pud);
 307
 308        void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
 309#endif  /* PAGETABLE_LEVELS == 4 */
 310#endif  /* PAGETABLE_LEVELS >= 3 */
 311
 312#ifdef CONFIG_HIGHPTE
 313        void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
 314#endif
 315
 316        struct pv_lazy_ops lazy_mode;
 317
 318        /* dom0 ops */
 319
 320        /* Sometimes the physical address is a pfn, and sometimes its
 321           an mfn.  We can tell which is which from the index. */
 322        void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
 323                           unsigned long phys, pgprot_t flags);
 324};
 325
 326struct raw_spinlock;
 327struct pv_lock_ops {
 328        int (*spin_is_locked)(struct raw_spinlock *lock);
 329        int (*spin_is_contended)(struct raw_spinlock *lock);
 330        void (*spin_lock)(struct raw_spinlock *lock);
 331        void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
 332        int (*spin_trylock)(struct raw_spinlock *lock);
 333        void (*spin_unlock)(struct raw_spinlock *lock);
 334};
 335
 336/* This contains all the paravirt structures: we get a convenient
 337 * number for each function using the offset which we use to indicate
 338 * what to patch. */
 339struct paravirt_patch_template {
 340        struct pv_init_ops pv_init_ops;
 341        struct pv_time_ops pv_time_ops;
 342        struct pv_cpu_ops pv_cpu_ops;
 343        struct pv_irq_ops pv_irq_ops;
 344        struct pv_apic_ops pv_apic_ops;
 345        struct pv_mmu_ops pv_mmu_ops;
 346        struct pv_lock_ops pv_lock_ops;
 347};
 348
 349extern struct pv_info pv_info;
 350extern struct pv_init_ops pv_init_ops;
 351extern struct pv_time_ops pv_time_ops;
 352extern struct pv_cpu_ops pv_cpu_ops;
 353extern struct pv_irq_ops pv_irq_ops;
 354extern struct pv_apic_ops pv_apic_ops;
 355extern struct pv_mmu_ops pv_mmu_ops;
 356extern struct pv_lock_ops pv_lock_ops;
 357
 358#define PARAVIRT_PATCH(x)                                       \
 359        (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
 360
 361#define paravirt_type(op)                               \
 362        [paravirt_typenum] "i" (PARAVIRT_PATCH(op)),    \
 363        [paravirt_opptr] "m" (op)
 364#define paravirt_clobber(clobber)               \
 365        [paravirt_clobber] "i" (clobber)
 366
 367/*
 368 * Generate some code, and mark it as patchable by the
 369 * apply_paravirt() alternate instruction patcher.
 370 */
 371#define _paravirt_alt(insn_string, type, clobber)       \
 372        "771:\n\t" insn_string "\n" "772:\n"            \
 373        ".pushsection .parainstructions,\"a\"\n"        \
 374        _ASM_ALIGN "\n"                                 \
 375        _ASM_PTR " 771b\n"                              \
 376        "  .byte " type "\n"                            \
 377        "  .byte 772b-771b\n"                           \
 378        "  .short " clobber "\n"                        \
 379        ".popsection\n"
 380
 381/* Generate patchable code, with the default asm parameters. */
 382#define paravirt_alt(insn_string)                                       \
 383        _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
 384
 385/* Simple instruction patching code. */
 386#define DEF_NATIVE(ops, name, code)                                     \
 387        extern const char start_##ops##_##name[], end_##ops##_##name[]; \
 388        asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
 389
 390unsigned paravirt_patch_nop(void);
 391unsigned paravirt_patch_ignore(unsigned len);
 392unsigned paravirt_patch_call(void *insnbuf,
 393                             const void *target, u16 tgt_clobbers,
 394                             unsigned long addr, u16 site_clobbers,
 395                             unsigned len);
 396unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 397                            unsigned long addr, unsigned len);
 398unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 399                                unsigned long addr, unsigned len);
 400
 401unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 402                              const char *start, const char *end);
 403
 404unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 405                      unsigned long addr, unsigned len);
 406
 407int paravirt_disable_iospace(void);
 408
 409/*
 410 * This generates an indirect call based on the operation type number.
 411 * The type number, computed in PARAVIRT_PATCH, is derived from the
 412 * offset into the paravirt_patch_template structure, and can therefore be
 413 * freely converted back into a structure offset.
 414 */
 415#define PARAVIRT_CALL   "call *%[paravirt_opptr];"
 416
 417/*
 418 * These macros are intended to wrap calls through one of the paravirt
 419 * ops structs, so that they can be later identified and patched at
 420 * runtime.
 421 *
 422 * Normally, a call to a pv_op function is a simple indirect call:
 423 * (pv_op_struct.operations)(args...).
 424 *
 425 * Unfortunately, this is a relatively slow operation for modern CPUs,
 426 * because it cannot necessarily determine what the destination
 427 * address is.  In this case, the address is a runtime constant, so at
 428 * the very least we can patch the call to e a simple direct call, or
 429 * ideally, patch an inline implementation into the callsite.  (Direct
 430 * calls are essentially free, because the call and return addresses
 431 * are completely predictable.)
 432 *
 433 * For i386, these macros rely on the standard gcc "regparm(3)" calling
 434 * convention, in which the first three arguments are placed in %eax,
 435 * %edx, %ecx (in that order), and the remaining arguments are placed
 436 * on the stack.  All caller-save registers (eax,edx,ecx) are expected
 437 * to be modified (either clobbered or used for return values).
 438 * X86_64, on the other hand, already specifies a register-based calling
 439 * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
 440 * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
 441 * special handling for dealing with 4 arguments, unlike i386.
 442 * However, x86_64 also have to clobber all caller saved registers, which
 443 * unfortunately, are quite a bit (r8 - r11)
 444 *
 445 * The call instruction itself is marked by placing its start address
 446 * and size into the .parainstructions section, so that
 447 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
 448 * appropriate patching under the control of the backend pv_init_ops
 449 * implementation.
 450 *
 451 * Unfortunately there's no way to get gcc to generate the args setup
 452 * for the call, and then allow the call itself to be generated by an
 453 * inline asm.  Because of this, we must do the complete arg setup and
 454 * return value handling from within these macros.  This is fairly
 455 * cumbersome.
 456 *
 457 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
 458 * It could be extended to more arguments, but there would be little
 459 * to be gained from that.  For each number of arguments, there are
 460 * the two VCALL and CALL variants for void and non-void functions.
 461 *
 462 * When there is a return value, the invoker of the macro must specify
 463 * the return type.  The macro then uses sizeof() on that type to
 464 * determine whether its a 32 or 64 bit value, and places the return
 465 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
 466 * 64-bit). For x86_64 machines, it just returns at %rax regardless of
 467 * the return value size.
 468 *
 469 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
 470 * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
 471 * in low,high order
 472 *
 473 * Small structures are passed and returned in registers.  The macro
 474 * calling convention can't directly deal with this, so the wrapper
 475 * functions must do this.
 476 *
 477 * These PVOP_* macros are only defined within this header.  This
 478 * means that all uses must be wrapped in inline functions.  This also
 479 * makes sure the incoming and outgoing types are always correct.
 480 */
 481#ifdef CONFIG_X86_32
 482#define PVOP_VCALL_ARGS                 unsigned long __eax, __edx, __ecx
 483#define PVOP_CALL_ARGS                  PVOP_VCALL_ARGS
 484#define PVOP_VCALL_CLOBBERS             "=a" (__eax), "=d" (__edx),     \
 485                                        "=c" (__ecx)
 486#define PVOP_CALL_CLOBBERS              PVOP_VCALL_CLOBBERS
 487#define EXTRA_CLOBBERS
 488#define VEXTRA_CLOBBERS
 489#else
 490#define PVOP_VCALL_ARGS         unsigned long __edi, __esi, __edx, __ecx
 491#define PVOP_CALL_ARGS          PVOP_VCALL_ARGS, __eax
 492#define PVOP_VCALL_CLOBBERS     "=D" (__edi),                           \
 493                                "=S" (__esi), "=d" (__edx),             \
 494                                "=c" (__ecx)
 495
 496#define PVOP_CALL_CLOBBERS      PVOP_VCALL_CLOBBERS, "=a" (__eax)
 497
 498#define EXTRA_CLOBBERS   , "r8", "r9", "r10", "r11"
 499#define VEXTRA_CLOBBERS  , "rax", "r8", "r9", "r10", "r11"
 500#endif
 501
 502#ifdef CONFIG_PARAVIRT_DEBUG
 503#define PVOP_TEST_NULL(op)      BUG_ON(op == NULL)
 504#else
 505#define PVOP_TEST_NULL(op)      ((void)op)
 506#endif
 507
 508#define __PVOP_CALL(rettype, op, pre, post, ...)                        \
 509        ({                                                              \
 510                rettype __ret;                                          \
 511                PVOP_CALL_ARGS;                                 \
 512                PVOP_TEST_NULL(op);                                     \
 513                /* This is 32-bit specific, but is okay in 64-bit */    \
 514                /* since this condition will never hold */              \
 515                if (sizeof(rettype) > sizeof(unsigned long)) {          \
 516                        asm volatile(pre                                \
 517                                     paravirt_alt(PARAVIRT_CALL)        \
 518                                     post                               \
 519                                     : PVOP_CALL_CLOBBERS               \
 520                                     : paravirt_type(op),               \
 521                                       paravirt_clobber(CLBR_ANY),      \
 522                                       ##__VA_ARGS__                    \
 523                                     : "memory", "cc" EXTRA_CLOBBERS);  \
 524                        __ret = (rettype)((((u64)__edx) << 32) | __eax); \
 525                } else {                                                \
 526                        asm volatile(pre                                \
 527                                     paravirt_alt(PARAVIRT_CALL)        \
 528                                     post                               \
 529                                     : PVOP_CALL_CLOBBERS               \
 530                                     : paravirt_type(op),               \
 531                                       paravirt_clobber(CLBR_ANY),      \
 532                                       ##__VA_ARGS__                    \
 533                                     : "memory", "cc" EXTRA_CLOBBERS);  \
 534                        __ret = (rettype)__eax;                         \
 535                }                                                       \
 536                __ret;                                                  \
 537        })
 538#define __PVOP_VCALL(op, pre, post, ...)                                \
 539        ({                                                              \
 540                PVOP_VCALL_ARGS;                                        \
 541                PVOP_TEST_NULL(op);                                     \
 542                asm volatile(pre                                        \
 543                             paravirt_alt(PARAVIRT_CALL)                \
 544                             post                                       \
 545                             : PVOP_VCALL_CLOBBERS                      \
 546                             : paravirt_type(op),                       \
 547                               paravirt_clobber(CLBR_ANY),              \
 548                               ##__VA_ARGS__                            \
 549                             : "memory", "cc" VEXTRA_CLOBBERS);         \
 550        })
 551
 552#define PVOP_CALL0(rettype, op)                                         \
 553        __PVOP_CALL(rettype, op, "", "")
 554#define PVOP_VCALL0(op)                                                 \
 555        __PVOP_VCALL(op, "", "")
 556
 557#define PVOP_CALL1(rettype, op, arg1)                                   \
 558        __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
 559#define PVOP_VCALL1(op, arg1)                                           \
 560        __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
 561
 562#define PVOP_CALL2(rettype, op, arg1, arg2)                             \
 563        __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),   \
 564        "1" ((unsigned long)(arg2)))
 565#define PVOP_VCALL2(op, arg1, arg2)                                     \
 566        __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),           \
 567        "1" ((unsigned long)(arg2)))
 568
 569#define PVOP_CALL3(rettype, op, arg1, arg2, arg3)                       \
 570        __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),   \
 571        "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
 572#define PVOP_VCALL3(op, arg1, arg2, arg3)                               \
 573        __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),           \
 574        "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
 575
 576/* This is the only difference in x86_64. We can make it much simpler */
 577#ifdef CONFIG_X86_32
 578#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)                 \
 579        __PVOP_CALL(rettype, op,                                        \
 580                    "push %[_arg4];", "lea 4(%%esp),%%esp;",            \
 581                    "0" ((u32)(arg1)), "1" ((u32)(arg2)),               \
 582                    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
 583#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)                         \
 584        __PVOP_VCALL(op,                                                \
 585                    "push %[_arg4];", "lea 4(%%esp),%%esp;",            \
 586                    "0" ((u32)(arg1)), "1" ((u32)(arg2)),               \
 587                    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
 588#else
 589#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)                 \
 590        __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),   \
 591        "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),         \
 592        "3"((unsigned long)(arg4)))
 593#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)                         \
 594        __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),           \
 595        "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),         \
 596        "3"((unsigned long)(arg4)))
 597#endif
 598
 599static inline int paravirt_enabled(void)
 600{
 601        return pv_info.paravirt_enabled;
 602}
 603
 604static inline void load_sp0(struct tss_struct *tss,
 605                             struct thread_struct *thread)
 606{
 607        PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
 608}
 609
 610#define ARCH_SETUP                      pv_init_ops.arch_setup();
 611static inline unsigned long get_wallclock(void)
 612{
 613        return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
 614}
 615
 616static inline int set_wallclock(unsigned long nowtime)
 617{
 618        return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
 619}
 620
 621static inline void (*choose_time_init(void))(void)
 622{
 623        return pv_time_ops.time_init;
 624}
 625
 626/* The paravirtualized CPUID instruction. */
 627static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
 628                           unsigned int *ecx, unsigned int *edx)
 629{
 630        PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
 631}
 632
 633/*
 634 * These special macros can be used to get or set a debugging register
 635 */
 636static inline unsigned long paravirt_get_debugreg(int reg)
 637{
 638        return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
 639}
 640#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
 641static inline void set_debugreg(unsigned long val, int reg)
 642{
 643        PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
 644}
 645
 646static inline void clts(void)
 647{
 648        PVOP_VCALL0(pv_cpu_ops.clts);
 649}
 650
 651static inline unsigned long read_cr0(void)
 652{
 653        return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
 654}
 655
 656static inline void write_cr0(unsigned long x)
 657{
 658        PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
 659}
 660
 661static inline unsigned long read_cr2(void)
 662{
 663        return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
 664}
 665
 666static inline void write_cr2(unsigned long x)
 667{
 668        PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
 669}
 670
 671static inline unsigned long read_cr3(void)
 672{
 673        return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
 674}
 675
 676static inline void write_cr3(unsigned long x)
 677{
 678        PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
 679}
 680
 681static inline unsigned long read_cr4(void)
 682{
 683        return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
 684}
 685static inline unsigned long read_cr4_safe(void)
 686{
 687        return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
 688}
 689
 690static inline void write_cr4(unsigned long x)
 691{
 692        PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
 693}
 694
 695#ifdef CONFIG_X86_64
 696static inline unsigned long read_cr8(void)
 697{
 698        return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
 699}
 700
 701static inline void write_cr8(unsigned long x)
 702{
 703        PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
 704}
 705#endif
 706
 707static inline void raw_safe_halt(void)
 708{
 709        PVOP_VCALL0(pv_irq_ops.safe_halt);
 710}
 711
 712static inline void halt(void)
 713{
 714        PVOP_VCALL0(pv_irq_ops.safe_halt);
 715}
 716
 717static inline void wbinvd(void)
 718{
 719        PVOP_VCALL0(pv_cpu_ops.wbinvd);
 720}
 721
 722#define get_kernel_rpl()  (pv_info.kernel_rpl)
 723
 724static inline u64 paravirt_read_msr(unsigned msr, int *err)
 725{
 726        return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
 727}
 728static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
 729{
 730        return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
 731}
 732static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 733{
 734        return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
 735}
 736
 737/* These should all do BUG_ON(_err), but our headers are too tangled. */
 738#define rdmsr(msr, val1, val2)                  \
 739do {                                            \
 740        int _err;                               \
 741        u64 _l = paravirt_read_msr(msr, &_err); \
 742        val1 = (u32)_l;                         \
 743        val2 = _l >> 32;                        \
 744} while (0)
 745
 746#define wrmsr(msr, val1, val2)                  \
 747do {                                            \
 748        paravirt_write_msr(msr, val1, val2);    \
 749} while (0)
 750
 751#define rdmsrl(msr, val)                        \
 752do {                                            \
 753        int _err;                               \
 754        val = paravirt_read_msr(msr, &_err);    \
 755} while (0)
 756
 757#define wrmsrl(msr, val)        wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
 758#define wrmsr_safe(msr, a, b)   paravirt_write_msr(msr, a, b)
 759
 760/* rdmsr with exception handling */
 761#define rdmsr_safe(msr, a, b)                   \
 762({                                              \
 763        int _err;                               \
 764        u64 _l = paravirt_read_msr(msr, &_err); \
 765        (*a) = (u32)_l;                         \
 766        (*b) = _l >> 32;                        \
 767        _err;                                   \
 768})
 769
 770static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 771{
 772        int err;
 773
 774        *p = paravirt_read_msr(msr, &err);
 775        return err;
 776}
 777static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 778{
 779        int err;
 780
 781        *p = paravirt_read_msr_amd(msr, &err);
 782        return err;
 783}
 784
 785static inline u64 paravirt_read_tsc(void)
 786{
 787        return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
 788}
 789
 790#define rdtscl(low)                             \
 791do {                                            \
 792        u64 _l = paravirt_read_tsc();           \
 793        low = (int)_l;                          \
 794} while (0)
 795
 796#define rdtscll(val) (val = paravirt_read_tsc())
 797
 798static inline unsigned long long paravirt_sched_clock(void)
 799{
 800        return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
 801}
 802#define calibrate_tsc() (pv_time_ops.get_tsc_khz())
 803
 804static inline unsigned long long paravirt_read_pmc(int counter)
 805{
 806        return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
 807}
 808
 809#define rdpmc(counter, low, high)               \
 810do {                                            \
 811        u64 _l = paravirt_read_pmc(counter);    \
 812        low = (u32)_l;                          \
 813        high = _l >> 32;                        \
 814} while (0)
 815
 816static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
 817{
 818        return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
 819}
 820
 821#define rdtscp(low, high, aux)                          \
 822do {                                                    \
 823        int __aux;                                      \
 824        unsigned long __val = paravirt_rdtscp(&__aux);  \
 825        (low) = (u32)__val;                             \
 826        (high) = (u32)(__val >> 32);                    \
 827        (aux) = __aux;                                  \
 828} while (0)
 829
 830#define rdtscpll(val, aux)                              \
 831do {                                                    \
 832        unsigned long __aux;                            \
 833        val = paravirt_rdtscp(&__aux);                  \
 834        (aux) = __aux;                                  \
 835} while (0)
 836
 837static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 838{
 839        PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
 840}
 841
 842static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
 843{
 844        PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
 845}
 846
 847static inline void load_TR_desc(void)
 848{
 849        PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
 850}
 851static inline void load_gdt(const struct desc_ptr *dtr)
 852{
 853        PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
 854}
 855static inline void load_idt(const struct desc_ptr *dtr)
 856{
 857        PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
 858}
 859static inline void set_ldt(const void *addr, unsigned entries)
 860{
 861        PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 862}
 863static inline void store_gdt(struct desc_ptr *dtr)
 864{
 865        PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
 866}
 867static inline void store_idt(struct desc_ptr *dtr)
 868{
 869        PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
 870}
 871static inline unsigned long paravirt_store_tr(void)
 872{
 873        return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
 874}
 875#define store_tr(tr)    ((tr) = paravirt_store_tr())
 876static inline void load_TLS(struct thread_struct *t, unsigned cpu)
 877{
 878        PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
 879}
 880
 881#ifdef CONFIG_X86_64
 882static inline void load_gs_index(unsigned int gs)
 883{
 884        PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
 885}
 886#endif
 887
 888static inline void write_ldt_entry(struct desc_struct *dt, int entry,
 889                                   const void *desc)
 890{
 891        PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
 892}
 893
 894static inline void write_gdt_entry(struct desc_struct *dt, int entry,
 895                                   void *desc, int type)
 896{
 897        PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
 898}
 899
 900static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 901{
 902        PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
 903}
 904static inline void set_iopl_mask(unsigned mask)
 905{
 906        PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
 907}
 908
 909/* The paravirtualized I/O functions */
 910static inline void slow_down_io(void)
 911{
 912        pv_cpu_ops.io_delay();
 913#ifdef REALLY_SLOW_IO
 914        pv_cpu_ops.io_delay();
 915        pv_cpu_ops.io_delay();
 916        pv_cpu_ops.io_delay();
 917#endif
 918}
 919
 920#ifdef CONFIG_X86_LOCAL_APIC
 921static inline void setup_boot_clock(void)
 922{
 923        PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
 924}
 925
 926static inline void setup_secondary_clock(void)
 927{
 928        PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
 929}
 930#endif
 931
 932static inline void paravirt_post_allocator_init(void)
 933{
 934        if (pv_init_ops.post_allocator_init)
 935                (*pv_init_ops.post_allocator_init)();
 936}
 937
 938static inline void paravirt_pagetable_setup_start(pgd_t *base)
 939{
 940        (*pv_mmu_ops.pagetable_setup_start)(base);
 941}
 942
 943static inline void paravirt_pagetable_setup_done(pgd_t *base)
 944{
 945        (*pv_mmu_ops.pagetable_setup_done)(base);
 946}
 947
 948#ifdef CONFIG_SMP
 949static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 950                                    unsigned long start_esp)
 951{
 952        PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
 953                    phys_apicid, start_eip, start_esp);
 954}
 955#endif
 956
 957static inline void paravirt_activate_mm(struct mm_struct *prev,
 958                                        struct mm_struct *next)
 959{
 960        PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
 961}
 962
 963static inline void arch_dup_mmap(struct mm_struct *oldmm,
 964                                 struct mm_struct *mm)
 965{
 966        PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
 967}
 968
 969static inline void arch_exit_mmap(struct mm_struct *mm)
 970{
 971        PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
 972}
 973
 974static inline void __flush_tlb(void)
 975{
 976        PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
 977}
 978static inline void __flush_tlb_global(void)
 979{
 980        PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 981}
 982static inline void __flush_tlb_single(unsigned long addr)
 983{
 984        PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
 985}
 986
 987static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 988                                    unsigned long va)
 989{
 990        PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
 991}
 992
 993static inline int paravirt_pgd_alloc(struct mm_struct *mm)
 994{
 995        return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
 996}
 997
 998static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 999{
1000        PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
1001}
1002
1003static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1004{
1005        PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
1006}
1007static inline void paravirt_release_pte(unsigned long pfn)
1008{
1009        PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
1010}
1011
1012static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1013{
1014        PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
1015}
1016
1017static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
1018                                            unsigned long start, unsigned long count)
1019{
1020        PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
1021}
1022static inline void paravirt_release_pmd(unsigned long pfn)
1023{
1024        PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
1025}
1026
1027static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1028{
1029        PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
1030}
1031static inline void paravirt_release_pud(unsigned long pfn)
1032{
1033        PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
1034}
1035
1036#ifdef CONFIG_HIGHPTE
1037static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
1038{
1039        unsigned long ret;
1040        ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
1041        return (void *)ret;
1042}
1043#endif
1044
1045static inline void pte_update(struct mm_struct *mm, unsigned long addr,
1046                              pte_t *ptep)
1047{
1048        PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
1049}
1050
1051static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
1052                                    pte_t *ptep)
1053{
1054        PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
1055}
1056
1057static inline pte_t __pte(pteval_t val)
1058{
1059        pteval_t ret;
1060
1061        if (sizeof(pteval_t) > sizeof(long))
1062                ret = PVOP_CALL2(pteval_t,
1063                                 pv_mmu_ops.make_pte,
1064                                 val, (u64)val >> 32);
1065        else
1066                ret = PVOP_CALL1(pteval_t,
1067                                 pv_mmu_ops.make_pte,
1068                                 val);
1069
1070        return (pte_t) { .pte = ret };
1071}
1072
1073static inline pteval_t pte_val(pte_t pte)
1074{
1075        pteval_t ret;
1076
1077        if (sizeof(pteval_t) > sizeof(long))
1078                ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
1079                                 pte.pte, (u64)pte.pte >> 32);
1080        else
1081                ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
1082                                 pte.pte);
1083
1084        return ret;
1085}
1086
1087static inline pteval_t pte_flags(pte_t pte)
1088{
1089        pteval_t ret;
1090
1091        if (sizeof(pteval_t) > sizeof(long))
1092                ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
1093                                 pte.pte, (u64)pte.pte >> 32);
1094        else
1095                ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
1096                                 pte.pte);
1097
1098#ifdef CONFIG_PARAVIRT_DEBUG
1099        BUG_ON(ret & PTE_PFN_MASK);
1100#endif
1101        return ret;
1102}
1103
1104static inline pgd_t __pgd(pgdval_t val)
1105{
1106        pgdval_t ret;
1107
1108        if (sizeof(pgdval_t) > sizeof(long))
1109                ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
1110                                 val, (u64)val >> 32);
1111        else
1112                ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
1113                                 val);
1114
1115        return (pgd_t) { ret };
1116}
1117
1118static inline pgdval_t pgd_val(pgd_t pgd)
1119{
1120        pgdval_t ret;
1121
1122        if (sizeof(pgdval_t) > sizeof(long))
1123                ret =  PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
1124                                  pgd.pgd, (u64)pgd.pgd >> 32);
1125        else
1126                ret =  PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
1127                                  pgd.pgd);
1128
1129        return ret;
1130}
1131
1132#define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1133static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
1134                                           pte_t *ptep)
1135{
1136        pteval_t ret;
1137
1138        ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
1139                         mm, addr, ptep);
1140
1141        return (pte_t) { .pte = ret };
1142}
1143
1144static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
1145                                           pte_t *ptep, pte_t pte)
1146{
1147        if (sizeof(pteval_t) > sizeof(long))
1148                /* 5 arg words */
1149                pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
1150        else
1151                PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
1152                            mm, addr, ptep, pte.pte);
1153}
1154
1155static inline void set_pte(pte_t *ptep, pte_t pte)
1156{
1157        if (sizeof(pteval_t) > sizeof(long))
1158                PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
1159                            pte.pte, (u64)pte.pte >> 32);
1160        else
1161                PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
1162                            pte.pte);
1163}
1164
1165static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
1166                              pte_t *ptep, pte_t pte)
1167{
1168        if (sizeof(pteval_t) > sizeof(long))
1169                /* 5 arg words */
1170                pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
1171        else
1172                PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
1173}
1174
1175static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
1176{
1177        pmdval_t val = native_pmd_val(pmd);
1178
1179        if (sizeof(pmdval_t) > sizeof(long))
1180                PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
1181        else
1182                PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
1183}
1184
1185#if PAGETABLE_LEVELS >= 3
1186static inline pmd_t __pmd(pmdval_t val)
1187{
1188        pmdval_t ret;
1189
1190        if (sizeof(pmdval_t) > sizeof(long))
1191                ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
1192                                 val, (u64)val >> 32);
1193        else
1194                ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
1195                                 val);
1196
1197        return (pmd_t) { ret };
1198}
1199
1200static inline pmdval_t pmd_val(pmd_t pmd)
1201{
1202        pmdval_t ret;
1203
1204        if (sizeof(pmdval_t) > sizeof(long))
1205                ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
1206                                  pmd.pmd, (u64)pmd.pmd >> 32);
1207        else
1208                ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
1209                                  pmd.pmd);
1210
1211        return ret;
1212}
1213
1214static inline void set_pud(pud_t *pudp, pud_t pud)
1215{
1216        pudval_t val = native_pud_val(pud);
1217
1218        if (sizeof(pudval_t) > sizeof(long))
1219                PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
1220                            val, (u64)val >> 32);
1221        else
1222                PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
1223                            val);
1224}
1225#if PAGETABLE_LEVELS == 4
1226static inline pud_t __pud(pudval_t val)
1227{
1228        pudval_t ret;
1229
1230        if (sizeof(pudval_t) > sizeof(long))
1231                ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
1232                                 val, (u64)val >> 32);
1233        else
1234                ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
1235                                 val);
1236
1237        return (pud_t) { ret };
1238}
1239
1240static inline pudval_t pud_val(pud_t pud)
1241{
1242        pudval_t ret;
1243
1244        if (sizeof(pudval_t) > sizeof(long))
1245                ret =  PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
1246                                  pud.pud, (u64)pud.pud >> 32);
1247        else
1248                ret =  PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
1249                                  pud.pud);
1250
1251        return ret;
1252}
1253
1254static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
1255{
1256        pgdval_t val = native_pgd_val(pgd);
1257
1258        if (sizeof(pgdval_t) > sizeof(long))
1259                PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
1260                            val, (u64)val >> 32);
1261        else
1262                PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
1263                            val);
1264}
1265
1266static inline void pgd_clear(pgd_t *pgdp)
1267{
1268        set_pgd(pgdp, __pgd(0));
1269}
1270
1271static inline void pud_clear(pud_t *pudp)
1272{
1273        set_pud(pudp, __pud(0));
1274}
1275
1276#endif  /* PAGETABLE_LEVELS == 4 */
1277
1278#endif  /* PAGETABLE_LEVELS >= 3 */
1279
1280#ifdef CONFIG_X86_PAE
1281/* Special-case pte-setting operations for PAE, which can't update a
1282   64-bit pte atomically */
1283static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1284{
1285        PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
1286                    pte.pte, pte.pte >> 32);
1287}
1288
1289static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1290                                   pte_t *ptep, pte_t pte)
1291{
1292        /* 5 arg words */
1293        pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
1294}
1295
1296static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1297                             pte_t *ptep)
1298{
1299        PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
1300}
1301
1302static inline void pmd_clear(pmd_t *pmdp)
1303{
1304        PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
1305}
1306#else  /* !CONFIG_X86_PAE */
1307static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1308{
1309        set_pte(ptep, pte);
1310}
1311
1312static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1313                                   pte_t *ptep, pte_t pte)
1314{
1315        set_pte(ptep, pte);
1316}
1317
1318static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1319                             pte_t *ptep)
1320{
1321        set_pte_at(mm, addr, ptep, __pte(0));
1322}
1323
1324static inline void pmd_clear(pmd_t *pmdp)
1325{
1326        set_pmd(pmdp, __pmd(0));
1327}
1328#endif  /* CONFIG_X86_PAE */
1329
1330/* Lazy mode for batching updates / context switch */
1331enum paravirt_lazy_mode {
1332        PARAVIRT_LAZY_NONE,
1333        PARAVIRT_LAZY_MMU,
1334        PARAVIRT_LAZY_CPU,
1335};
1336
1337enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1338void paravirt_enter_lazy_cpu(void);
1339void paravirt_leave_lazy_cpu(void);
1340void paravirt_enter_lazy_mmu(void);
1341void paravirt_leave_lazy_mmu(void);
1342void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
1343
1344#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
1345static inline void arch_enter_lazy_cpu_mode(void)
1346{
1347        PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
1348}
1349
1350static inline void arch_leave_lazy_cpu_mode(void)
1351{
1352        PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
1353}
1354
1355static inline void arch_flush_lazy_cpu_mode(void)
1356{
1357        if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
1358                arch_leave_lazy_cpu_mode();
1359                arch_enter_lazy_cpu_mode();
1360        }
1361}
1362
1363
1364#define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1365static inline void arch_enter_lazy_mmu_mode(void)
1366{
1367        PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
1368}
1369
1370static inline void arch_leave_lazy_mmu_mode(void)
1371{
1372        PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
1373}
1374
1375static inline void arch_flush_lazy_mmu_mode(void)
1376{
1377        if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
1378                arch_leave_lazy_mmu_mode();
1379                arch_enter_lazy_mmu_mode();
1380        }
1381}
1382
1383static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1384                                unsigned long phys, pgprot_t flags)
1385{
1386        pv_mmu_ops.set_fixmap(idx, phys, flags);
1387}
1388
1389void _paravirt_nop(void);
1390#define paravirt_nop    ((void *)_paravirt_nop)
1391
1392void paravirt_use_bytelocks(void);
1393
1394#ifdef CONFIG_SMP
1395
1396static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
1397{
1398        return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
1399}
1400
1401static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
1402{
1403        return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
1404}
1405
1406static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
1407{
1408        PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
1409}
1410
1411static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
1412                                                  unsigned long flags)
1413{
1414        PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
1415}
1416
1417static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
1418{
1419        return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
1420}
1421
1422static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
1423{
1424        PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
1425}
1426
1427#endif
1428
1429/* These all sit in the .parainstructions section to tell us what to patch. */
1430struct paravirt_patch_site {
1431        u8 *instr;              /* original instructions */
1432        u8 instrtype;           /* type of this instruction */
1433        u8 len;                 /* length of original instruction */
1434        u16 clobbers;           /* what registers you may clobber */
1435};
1436
1437extern struct paravirt_patch_site __parainstructions[],
1438        __parainstructions_end[];
1439
1440#ifdef CONFIG_X86_32
1441#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
1442#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
1443#define PV_FLAGS_ARG "0"
1444#define PV_EXTRA_CLOBBERS
1445#define PV_VEXTRA_CLOBBERS
1446#else
1447/* We save some registers, but all of them, that's too much. We clobber all
1448 * caller saved registers but the argument parameter */
1449#define PV_SAVE_REGS "pushq %%rdi;"
1450#define PV_RESTORE_REGS "popq %%rdi;"
1451#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
1452#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
1453#define PV_FLAGS_ARG "D"
1454#endif
1455
1456static inline unsigned long __raw_local_save_flags(void)
1457{
1458        unsigned long f;
1459
1460        asm volatile(paravirt_alt(PV_SAVE_REGS
1461                                  PARAVIRT_CALL
1462                                  PV_RESTORE_REGS)
1463                     : "=a"(f)
1464                     : paravirt_type(pv_irq_ops.save_fl),
1465                       paravirt_clobber(CLBR_EAX)
1466                     : "memory", "cc" PV_VEXTRA_CLOBBERS);
1467        return f;
1468}
1469
1470static inline void raw_local_irq_restore(unsigned long f)
1471{
1472        asm volatile(paravirt_alt(PV_SAVE_REGS
1473                                  PARAVIRT_CALL
1474                                  PV_RESTORE_REGS)
1475                     : "=a"(f)
1476                     : PV_FLAGS_ARG(f),
1477                       paravirt_type(pv_irq_ops.restore_fl),
1478                       paravirt_clobber(CLBR_EAX)
1479                     : "memory", "cc" PV_EXTRA_CLOBBERS);
1480}
1481
1482static inline void raw_local_irq_disable(void)
1483{
1484        asm volatile(paravirt_alt(PV_SAVE_REGS
1485                                  PARAVIRT_CALL
1486                                  PV_RESTORE_REGS)
1487                     :
1488                     : paravirt_type(pv_irq_ops.irq_disable),
1489                       paravirt_clobber(CLBR_EAX)
1490                     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
1491}
1492
1493static inline void raw_local_irq_enable(void)
1494{
1495        asm volatile(paravirt_alt(PV_SAVE_REGS
1496                                  PARAVIRT_CALL
1497                                  PV_RESTORE_REGS)
1498                     :
1499                     : paravirt_type(pv_irq_ops.irq_enable),
1500                       paravirt_clobber(CLBR_EAX)
1501                     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
1502}
1503
1504static inline unsigned long __raw_local_irq_save(void)
1505{
1506        unsigned long f;
1507
1508        f = __raw_local_save_flags();
1509        raw_local_irq_disable();
1510        return f;
1511}
1512
1513
1514/* Make sure as little as possible of this mess escapes. */
1515#undef PARAVIRT_CALL
1516#undef __PVOP_CALL
1517#undef __PVOP_VCALL
1518#undef PVOP_VCALL0
1519#undef PVOP_CALL0
1520#undef PVOP_VCALL1
1521#undef PVOP_CALL1
1522#undef PVOP_VCALL2
1523#undef PVOP_CALL2
1524#undef PVOP_VCALL3
1525#undef PVOP_CALL3
1526#undef PVOP_VCALL4
1527#undef PVOP_CALL4
1528
1529#else  /* __ASSEMBLY__ */
1530
1531#define _PVSITE(ptype, clobbers, ops, word, algn)       \
1532771:;                                           \
1533        ops;                                    \
1534772:;                                           \
1535        .pushsection .parainstructions,"a";     \
1536         .align algn;                           \
1537         word 771b;                             \
1538         .byte ptype;                           \
1539         .byte 772b-771b;                       \
1540         .short clobbers;                       \
1541        .popsection
1542
1543
1544#ifdef CONFIG_X86_64
1545#define PV_SAVE_REGS                            \
1546        push %rax;                              \
1547        push %rcx;                              \
1548        push %rdx;                              \
1549        push %rsi;                              \
1550        push %rdi;                              \
1551        push %r8;                               \
1552        push %r9;                               \
1553        push %r10;                              \
1554        push %r11
1555#define PV_RESTORE_REGS                         \
1556        pop %r11;                               \
1557        pop %r10;                               \
1558        pop %r9;                                \
1559        pop %r8;                                \
1560        pop %rdi;                               \
1561        pop %rsi;                               \
1562        pop %rdx;                               \
1563        pop %rcx;                               \
1564        pop %rax
1565#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
1566#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
1567#define PARA_INDIRECT(addr)     *addr(%rip)
1568#else
1569#define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
1570#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
1571#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
1572#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
1573#define PARA_INDIRECT(addr)     *%cs:addr
1574#endif
1575
1576#define INTERRUPT_RETURN                                                \
1577        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,       \
1578                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
1579
1580#define DISABLE_INTERRUPTS(clobbers)                                    \
1581        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
1582                  PV_SAVE_REGS;                                         \
1583                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
1584                  PV_RESTORE_REGS;)                     \
1585
1586#define ENABLE_INTERRUPTS(clobbers)                                     \
1587        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
1588                  PV_SAVE_REGS;                                         \
1589                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
1590                  PV_RESTORE_REGS;)
1591
1592#define USERGS_SYSRET32                                                 \
1593        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),       \
1594                  CLBR_NONE,                                            \
1595                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
1596
1597#ifdef CONFIG_X86_32
1598#define GET_CR0_INTO_EAX                                \
1599        push %ecx; push %edx;                           \
1600        call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
1601        pop %edx; pop %ecx
1602
1603#define ENABLE_INTERRUPTS_SYSEXIT                                       \
1604        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
1605                  CLBR_NONE,                                            \
1606                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1607
1608
1609#else   /* !CONFIG_X86_32 */
1610
1611/*
1612 * If swapgs is used while the userspace stack is still current,
1613 * there's no way to call a pvop.  The PV replacement *must* be
1614 * inlined, or the swapgs instruction must be trapped and emulated.
1615 */
1616#define SWAPGS_UNSAFE_STACK                                             \
1617        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
1618                  swapgs)
1619
1620#define SWAPGS                                                          \
1621        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
1622                  PV_SAVE_REGS;                                         \
1623                  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
1624                  PV_RESTORE_REGS                                       \
1625                 )
1626
1627#define GET_CR2_INTO_RCX                                \
1628        call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
1629        movq %rax, %rcx;                                \
1630        xorq %rax, %rax;
1631
1632#define PARAVIRT_ADJUST_EXCEPTION_FRAME                                 \
1633        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
1634                  CLBR_NONE,                                            \
1635                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
1636
1637#define USERGS_SYSRET64                                                 \
1638        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
1639                  CLBR_NONE,                                            \
1640                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
1641
1642#define ENABLE_INTERRUPTS_SYSEXIT32                                     \
1643        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
1644                  CLBR_NONE,                                            \
1645                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1646#endif  /* CONFIG_X86_32 */
1647
1648#endif /* __ASSEMBLY__ */
1649#endif /* CONFIG_PARAVIRT */
1650#endif /* _ASM_X86_PARAVIRT_H */
1651
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.