linux/arch/x86/xen/smp.c
<<
>>
Prefs
   1/*
   2 * Xen SMP support
   3 *
   4 * This file implements the Xen versions of smp_ops.  SMP under Xen is
   5 * very straightforward.  Bringing a CPU up is simply a matter of
   6 * loading its initial context and setting it running.
   7 *
   8 * IPIs are handled through the Xen event mechanism.
   9 *
  10 * Because virtual CPUs can be scheduled onto any real CPU, there's no
  11 * useful topology information for the kernel to make use of.  As a
  12 * result, all CPUs are treated as if they're single-core and
  13 * single-threaded.
  14 *
  15 * This does not handle HOTPLUG_CPU yet.
  16 */
  17#include <linux/sched.h>
  18#include <linux/kernel_stat.h>
  19#include <linux/err.h>
  20#include <linux/smp.h>
  21
  22#include <asm/paravirt.h>
  23#include <asm/desc.h>
  24#include <asm/pgtable.h>
  25#include <asm/cpu.h>
  26
  27#include <xen/interface/xen.h>
  28#include <xen/interface/vcpu.h>
  29
  30#include <asm/xen/interface.h>
  31#include <asm/xen/hypercall.h>
  32
  33#include <xen/page.h>
  34#include <xen/events.h>
  35
  36#include "xen-ops.h"
  37#include "mmu.h"
  38
  39static void __cpuinit xen_init_lock_cpu(int cpu);
  40
  41cpumask_t xen_cpu_initialized_map;
  42
  43static DEFINE_PER_CPU(int, resched_irq);
  44static DEFINE_PER_CPU(int, callfunc_irq);
  45static DEFINE_PER_CPU(int, callfuncsingle_irq);
  46static DEFINE_PER_CPU(int, debug_irq) = -1;
  47
  48static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
  49static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
  50
  51/*
  52 * Reschedule call back. Nothing to do,
  53 * all the work is done automatically when
  54 * we return from the interrupt.
  55 */
  56static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
  57{
  58#ifdef CONFIG_X86_32
  59        __get_cpu_var(irq_stat).irq_resched_count++;
  60#else
  61        add_pda(irq_resched_count, 1);
  62#endif
  63
  64        return IRQ_HANDLED;
  65}
  66
  67static __cpuinit void cpu_bringup_and_idle(void)
  68{
  69        int cpu = smp_processor_id();
  70
  71        cpu_init();
  72        preempt_disable();
  73
  74        xen_enable_sysenter();
  75        xen_enable_syscall();
  76
  77        cpu = smp_processor_id();
  78        smp_store_cpu_info(cpu);
  79        cpu_data(cpu).x86_max_cores = 1;
  80        set_cpu_sibling_map(cpu);
  81
  82        xen_setup_cpu_clockevents();
  83
  84        cpu_set(cpu, cpu_online_map);
  85        x86_write_percpu(cpu_state, CPU_ONLINE);
  86        wmb();
  87
  88        /* We can take interrupts now: we're officially "up". */
  89        local_irq_enable();
  90
  91        wmb();                  /* make sure everything is out */
  92        cpu_idle();
  93}
  94
  95static int xen_smp_intr_init(unsigned int cpu)
  96{
  97        int rc;
  98        const char *resched_name, *callfunc_name, *debug_name;
  99
 100        resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 101        rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
 102                                    cpu,
 103                                    xen_reschedule_interrupt,
 104                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 105                                    resched_name,
 106                                    NULL);
 107        if (rc < 0)
 108                goto fail;
 109        per_cpu(resched_irq, cpu) = rc;
 110
 111        callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
 112        rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
 113                                    cpu,
 114                                    xen_call_function_interrupt,
 115                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 116                                    callfunc_name,
 117                                    NULL);
 118        if (rc < 0)
 119                goto fail;
 120        per_cpu(callfunc_irq, cpu) = rc;
 121
 122        debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
 123        rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
 124                                     IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
 125                                     debug_name, NULL);
 126        if (rc < 0)
 127                goto fail;
 128        per_cpu(debug_irq, cpu) = rc;
 129
 130        callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
 131        rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
 132                                    cpu,
 133                                    xen_call_function_single_interrupt,
 134                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 135                                    callfunc_name,
 136                                    NULL);
 137        if (rc < 0)
 138                goto fail;
 139        per_cpu(callfuncsingle_irq, cpu) = rc;
 140
 141        return 0;
 142
 143 fail:
 144        if (per_cpu(resched_irq, cpu) >= 0)
 145                unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
 146        if (per_cpu(callfunc_irq, cpu) >= 0)
 147                unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 148        if (per_cpu(debug_irq, cpu) >= 0)
 149                unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
 150        if (per_cpu(callfuncsingle_irq, cpu) >= 0)
 151                unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
 152
 153        return rc;
 154}
 155
 156static void __init xen_fill_possible_map(void)
 157{
 158        int i, rc;
 159
 160        for (i = 0; i < NR_CPUS; i++) {
 161                rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 162                if (rc >= 0) {
 163                        num_processors++;
 164                        cpu_set(i, cpu_possible_map);
 165                }
 166        }
 167}
 168
 169static void __init xen_smp_prepare_boot_cpu(void)
 170{
 171        BUG_ON(smp_processor_id() != 0);
 172        native_smp_prepare_boot_cpu();
 173
 174        /* We've switched to the "real" per-cpu gdt, so make sure the
 175           old memory can be recycled */
 176        make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
 177
 178        xen_setup_vcpu_info_placement();
 179}
 180
 181static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 182{
 183        unsigned cpu;
 184
 185        xen_init_lock_cpu(0);
 186
 187        smp_store_cpu_info(0);
 188        cpu_data(0).x86_max_cores = 1;
 189        set_cpu_sibling_map(0);
 190
 191        if (xen_smp_intr_init(0))
 192                BUG();
 193
 194        xen_cpu_initialized_map = cpumask_of_cpu(0);
 195
 196        /* Restrict the possible_map according to max_cpus. */
 197        while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
 198                for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
 199                        continue;
 200                cpu_clear(cpu, cpu_possible_map);
 201        }
 202
 203        for_each_possible_cpu (cpu) {
 204                struct task_struct *idle;
 205
 206                if (cpu == 0)
 207                        continue;
 208
 209                idle = fork_idle(cpu);
 210                if (IS_ERR(idle))
 211                        panic("failed fork for CPU %d", cpu);
 212
 213                cpu_set(cpu, cpu_present_map);
 214        }
 215
 216        //init_xenbus_allowed_cpumask();
 217}
 218
 219static __cpuinit int
 220cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 221{
 222        struct vcpu_guest_context *ctxt;
 223        struct desc_struct *gdt;
 224
 225        if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
 226                return 0;
 227
 228        ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 229        if (ctxt == NULL)
 230                return -ENOMEM;
 231
 232        gdt = get_cpu_gdt_table(cpu);
 233
 234        ctxt->flags = VGCF_IN_KERNEL;
 235        ctxt->user_regs.ds = __USER_DS;
 236        ctxt->user_regs.es = __USER_DS;
 237        ctxt->user_regs.ss = __KERNEL_DS;
 238#ifdef CONFIG_X86_32
 239        ctxt->user_regs.fs = __KERNEL_PERCPU;
 240#endif
 241        ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 242        ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 243
 244        memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 245
 246        xen_copy_trap_info(ctxt->trap_ctxt);
 247
 248        ctxt->ldt_ents = 0;
 249
 250        BUG_ON((unsigned long)gdt & ~PAGE_MASK);
 251        make_lowmem_page_readonly(gdt);
 252
 253        ctxt->gdt_frames[0] = virt_to_mfn(gdt);
 254        ctxt->gdt_ents      = GDT_ENTRIES;
 255
 256        ctxt->user_regs.cs = __KERNEL_CS;
 257        ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 258
 259        ctxt->kernel_ss = __KERNEL_DS;
 260        ctxt->kernel_sp = idle->thread.sp0;
 261
 262#ifdef CONFIG_X86_32
 263        ctxt->event_callback_cs     = __KERNEL_CS;
 264        ctxt->failsafe_callback_cs  = __KERNEL_CS;
 265#endif
 266        ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
 267        ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
 268
 269        per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
 270        ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 271
 272        if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
 273                BUG();
 274
 275        kfree(ctxt);
 276        return 0;
 277}
 278
 279static int __cpuinit xen_cpu_up(unsigned int cpu)
 280{
 281        struct task_struct *idle = idle_task(cpu);
 282        int rc;
 283
 284#if 0
 285        rc = cpu_up_check(cpu);
 286        if (rc)
 287                return rc;
 288#endif
 289
 290#ifdef CONFIG_X86_64
 291        /* Allocate node local memory for AP pdas */
 292        WARN_ON(cpu == 0);
 293        if (cpu > 0) {
 294                rc = get_local_pda(cpu);
 295                if (rc)
 296                        return rc;
 297        }
 298#endif
 299
 300#ifdef CONFIG_X86_32
 301        init_gdt(cpu);
 302        per_cpu(current_task, cpu) = idle;
 303        irq_ctx_init(cpu);
 304#else
 305        cpu_pda(cpu)->pcurrent = idle;
 306        clear_tsk_thread_flag(idle, TIF_FORK);
 307#endif
 308        xen_setup_timer(cpu);
 309        xen_init_lock_cpu(cpu);
 310
 311        per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 312
 313        /* make sure interrupts start blocked */
 314        per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 315
 316        rc = cpu_initialize_context(cpu, idle);
 317        if (rc)
 318                return rc;
 319
 320        if (num_online_cpus() == 1)
 321                alternatives_smp_switch(1);
 322
 323        rc = xen_smp_intr_init(cpu);
 324        if (rc)
 325                return rc;
 326
 327        rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 328        BUG_ON(rc);
 329
 330        while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
 331                HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 332                barrier();
 333        }
 334
 335        return 0;
 336}
 337
 338static void xen_smp_cpus_done(unsigned int max_cpus)
 339{
 340}
 341
 342static void stop_self(void *v)
 343{
 344        int cpu = smp_processor_id();
 345
 346        /* make sure we're not pinning something down */
 347        load_cr3(swapper_pg_dir);
 348        /* should set up a minimal gdt */
 349
 350        HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
 351        BUG();
 352}
 353
 354static void xen_smp_send_stop(void)
 355{
 356        smp_call_function(stop_self, NULL, 0);
 357}
 358
 359static void xen_smp_send_reschedule(int cpu)
 360{
 361        xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 362}
 363
 364static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 365{
 366        unsigned cpu;
 367
 368        cpus_and(mask, mask, cpu_online_map);
 369
 370        for_each_cpu_mask_nr(cpu, mask)
 371                xen_send_IPI_one(cpu, vector);
 372}
 373
 374static void xen_smp_send_call_function_ipi(cpumask_t mask)
 375{
 376        int cpu;
 377
 378        xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 379
 380        /* Make sure other vcpus get a chance to run if they need to. */
 381        for_each_cpu_mask_nr(cpu, mask) {
 382                if (xen_vcpu_stolen(cpu)) {
 383                        HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 384                        break;
 385                }
 386        }
 387}
 388
 389static void xen_smp_send_call_function_single_ipi(int cpu)
 390{
 391        xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
 392}
 393
 394static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 395{
 396        irq_enter();
 397        generic_smp_call_function_interrupt();
 398#ifdef CONFIG_X86_32
 399        __get_cpu_var(irq_stat).irq_call_count++;
 400#else
 401        add_pda(irq_call_count, 1);
 402#endif
 403        irq_exit();
 404
 405        return IRQ_HANDLED;
 406}
 407
 408static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 409{
 410        irq_enter();
 411        generic_smp_call_function_single_interrupt();
 412#ifdef CONFIG_X86_32
 413        __get_cpu_var(irq_stat).irq_call_count++;
 414#else
 415        add_pda(irq_call_count, 1);
 416#endif
 417        irq_exit();
 418
 419        return IRQ_HANDLED;
 420}
 421
 422struct xen_spinlock {
 423        unsigned char lock;             /* 0 -> free; 1 -> locked */
 424        unsigned short spinners;        /* count of waiting cpus */
 425};
 426
 427static int xen_spin_is_locked(struct raw_spinlock *lock)
 428{
 429        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 430
 431        return xl->lock != 0;
 432}
 433
 434static int xen_spin_is_contended(struct raw_spinlock *lock)
 435{
 436        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 437
 438        /* Not strictly true; this is only the count of contended
 439           lock-takers entering the slow path. */
 440        return xl->spinners != 0;
 441}
 442
 443static int xen_spin_trylock(struct raw_spinlock *lock)
 444{
 445        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 446        u8 old = 1;
 447
 448        asm("xchgb %b0,%1"
 449            : "+q" (old), "+m" (xl->lock) : : "memory");
 450
 451        return old == 0;
 452}
 453
 454static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 455static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
 456
 457static inline void spinning_lock(struct xen_spinlock *xl)
 458{
 459        __get_cpu_var(lock_spinners) = xl;
 460        wmb();                  /* set lock of interest before count */
 461        asm(LOCK_PREFIX " incw %0"
 462            : "+m" (xl->spinners) : : "memory");
 463}
 464
 465static inline void unspinning_lock(struct xen_spinlock *xl)
 466{
 467        asm(LOCK_PREFIX " decw %0"
 468            : "+m" (xl->spinners) : : "memory");
 469        wmb();                  /* decrement count before clearing lock */
 470        __get_cpu_var(lock_spinners) = NULL;
 471}
 472
 473static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 474{
 475        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 476        int irq = __get_cpu_var(lock_kicker_irq);
 477        int ret;
 478
 479        /* If kicker interrupts not initialized yet, just spin */
 480        if (irq == -1)
 481                return 0;
 482
 483        /* announce we're spinning */
 484        spinning_lock(xl);
 485
 486        /* clear pending */
 487        xen_clear_irq_pending(irq);
 488
 489        /* check again make sure it didn't become free while
 490           we weren't looking  */
 491        ret = xen_spin_trylock(lock);
 492        if (ret)
 493                goto out;
 494
 495        /* block until irq becomes pending */
 496        xen_poll_irq(irq);
 497        kstat_this_cpu.irqs[irq]++;
 498
 499out:
 500        unspinning_lock(xl);
 501        return ret;
 502}
 503
 504static void xen_spin_lock(struct raw_spinlock *lock)
 505{
 506        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 507        int timeout;
 508        u8 oldval;
 509
 510        do {
 511                timeout = 1 << 10;
 512
 513                asm("1: xchgb %1,%0\n"
 514                    "   testb %1,%1\n"
 515                    "   jz 3f\n"
 516                    "2: rep;nop\n"
 517                    "   cmpb $0,%0\n"
 518                    "   je 1b\n"
 519                    "   dec %2\n"
 520                    "   jnz 2b\n"
 521                    "3:\n"
 522                    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
 523                    : "1" (1)
 524                    : "memory");
 525
 526        } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
 527}
 528
 529static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 530{
 531        int cpu;
 532
 533        for_each_online_cpu(cpu) {
 534                /* XXX should mix up next cpu selection */
 535                if (per_cpu(lock_spinners, cpu) == xl) {
 536                        xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 537                        break;
 538                }
 539        }
 540}
 541
 542static void xen_spin_unlock(struct raw_spinlock *lock)
 543{
 544        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 545
 546        smp_wmb();              /* make sure no writes get moved after unlock */
 547        xl->lock = 0;           /* release lock */
 548
 549        /* make sure unlock happens before kick */
 550        barrier();
 551
 552        if (unlikely(xl->spinners))
 553                xen_spin_unlock_slow(xl);
 554}
 555
 556static __cpuinit void xen_init_lock_cpu(int cpu)
 557{
 558        int irq;
 559        const char *name;
 560
 561        name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
 562        irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
 563                                     cpu,
 564                                     xen_reschedule_interrupt,
 565                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 566                                     name,
 567                                     NULL);
 568
 569        if (irq >= 0) {
 570                disable_irq(irq); /* make sure it's never delivered */
 571                per_cpu(lock_kicker_irq, cpu) = irq;
 572        }
 573
 574        printk("cpu %d spinlock event irq %d\n", cpu, irq);
 575}
 576
 577static void __init xen_init_spinlocks(void)
 578{
 579        pv_lock_ops.spin_is_locked = xen_spin_is_locked;
 580        pv_lock_ops.spin_is_contended = xen_spin_is_contended;
 581        pv_lock_ops.spin_lock = xen_spin_lock;
 582        pv_lock_ops.spin_trylock = xen_spin_trylock;
 583        pv_lock_ops.spin_unlock = xen_spin_unlock;
 584}
 585
 586static const struct smp_ops xen_smp_ops __initdata = {
 587        .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 588        .smp_prepare_cpus = xen_smp_prepare_cpus,
 589        .cpu_up = xen_cpu_up,
 590        .smp_cpus_done = xen_smp_cpus_done,
 591
 592        .smp_send_stop = xen_smp_send_stop,
 593        .smp_send_reschedule = xen_smp_send_reschedule,
 594
 595        .send_call_func_ipi = xen_smp_send_call_function_ipi,
 596        .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 597};
 598
 599void __init xen_smp_init(void)
 600{
 601        smp_ops = xen_smp_ops;
 602        xen_fill_possible_map();
 603        xen_init_spinlocks();
 604}
 605