linux/arch/x86/xen/spinlock.c
<<
>>
Prefs
   1/*
   2 * Split spinlock implementation out into its own file, so it can be
   3 * compiled in a FTRACE-compatible way.
   4 */
   5#include <linux/kernel_stat.h>
   6#include <linux/spinlock.h>
   7#include <linux/debugfs.h>
   8#include <linux/log2.h>
   9#include <linux/gfp.h>
  10
  11#include <asm/paravirt.h>
  12
  13#include <xen/interface/xen.h>
  14#include <xen/events.h>
  15
  16#include "xen-ops.h"
  17#include "debugfs.h"
  18
  19#ifdef CONFIG_XEN_DEBUG_FS
  20static struct xen_spinlock_stats
  21{
  22        u64 taken;
  23        u32 taken_slow;
  24        u32 taken_slow_nested;
  25        u32 taken_slow_pickup;
  26        u32 taken_slow_spurious;
  27        u32 taken_slow_irqenable;
  28
  29        u64 released;
  30        u32 released_slow;
  31        u32 released_slow_kicked;
  32
  33#define HISTO_BUCKETS   30
  34        u32 histo_spin_total[HISTO_BUCKETS+1];
  35        u32 histo_spin_spinning[HISTO_BUCKETS+1];
  36        u32 histo_spin_blocked[HISTO_BUCKETS+1];
  37
  38        u64 time_total;
  39        u64 time_spinning;
  40        u64 time_blocked;
  41} spinlock_stats;
  42
  43static u8 zero_stats;
  44
  45static unsigned lock_timeout = 1 << 10;
  46#define TIMEOUT lock_timeout
  47
  48static inline void check_zero(void)
  49{
  50        if (unlikely(zero_stats)) {
  51                memset(&spinlock_stats, 0, sizeof(spinlock_stats));
  52                zero_stats = 0;
  53        }
  54}
  55
  56#define ADD_STATS(elem, val)                    \
  57        do { check_zero(); spinlock_stats.elem += (val); } while(0)
  58
  59static inline u64 spin_time_start(void)
  60{
  61        return xen_clocksource_read();
  62}
  63
  64static void __spin_time_accum(u64 delta, u32 *array)
  65{
  66        unsigned index = ilog2(delta);
  67
  68        check_zero();
  69
  70        if (index < HISTO_BUCKETS)
  71                array[index]++;
  72        else
  73                array[HISTO_BUCKETS]++;
  74}
  75
  76static inline void spin_time_accum_spinning(u64 start)
  77{
  78        u32 delta = xen_clocksource_read() - start;
  79
  80        __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
  81        spinlock_stats.time_spinning += delta;
  82}
  83
  84static inline void spin_time_accum_total(u64 start)
  85{
  86        u32 delta = xen_clocksource_read() - start;
  87
  88        __spin_time_accum(delta, spinlock_stats.histo_spin_total);
  89        spinlock_stats.time_total += delta;
  90}
  91
  92static inline void spin_time_accum_blocked(u64 start)
  93{
  94        u32 delta = xen_clocksource_read() - start;
  95
  96        __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
  97        spinlock_stats.time_blocked += delta;
  98}
  99#else  /* !CONFIG_XEN_DEBUG_FS */
 100#define TIMEOUT                 (1 << 10)
 101#define ADD_STATS(elem, val)    do { (void)(val); } while(0)
 102
 103static inline u64 spin_time_start(void)
 104{
 105        return 0;
 106}
 107
 108static inline void spin_time_accum_total(u64 start)
 109{
 110}
 111static inline void spin_time_accum_spinning(u64 start)
 112{
 113}
 114static inline void spin_time_accum_blocked(u64 start)
 115{
 116}
 117#endif  /* CONFIG_XEN_DEBUG_FS */
 118
 119/*
 120 * Size struct xen_spinlock so it's the same as arch_spinlock_t.
 121 */
 122#if NR_CPUS < 256
 123typedef u8 xen_spinners_t;
 124# define inc_spinners(xl) \
 125        asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory");
 126# define dec_spinners(xl) \
 127        asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory");
 128#else
 129typedef u16 xen_spinners_t;
 130# define inc_spinners(xl) \
 131        asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory");
 132# define dec_spinners(xl) \
 133        asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
 134#endif
 135
 136struct xen_spinlock {
 137        unsigned char lock;             /* 0 -> free; 1 -> locked */
 138        xen_spinners_t spinners;        /* count of waiting cpus */
 139};
 140
 141static int xen_spin_is_locked(struct arch_spinlock *lock)
 142{
 143        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 144
 145        return xl->lock != 0;
 146}
 147
 148static int xen_spin_is_contended(struct arch_spinlock *lock)
 149{
 150        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 151
 152        /* Not strictly true; this is only the count of contended
 153           lock-takers entering the slow path. */
 154        return xl->spinners != 0;
 155}
 156
 157static int xen_spin_trylock(struct arch_spinlock *lock)
 158{
 159        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 160        u8 old = 1;
 161
 162        asm("xchgb %b0,%1"
 163            : "+q" (old), "+m" (xl->lock) : : "memory");
 164
 165        return old == 0;
 166}
 167
 168static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 169static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
 170
 171/*
 172 * Mark a cpu as interested in a lock.  Returns the CPU's previous
 173 * lock of interest, in case we got preempted by an interrupt.
 174 */
 175static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
 176{
 177        struct xen_spinlock *prev;
 178
 179        prev = __this_cpu_read(lock_spinners);
 180        __this_cpu_write(lock_spinners, xl);
 181
 182        wmb();                  /* set lock of interest before count */
 183
 184        inc_spinners(xl);
 185
 186        return prev;
 187}
 188
 189/*
 190 * Mark a cpu as no longer interested in a lock.  Restores previous
 191 * lock of interest (NULL for none).
 192 */
 193static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
 194{
 195        dec_spinners(xl);
 196        wmb();                  /* decrement count before restoring lock */
 197        __this_cpu_write(lock_spinners, prev);
 198}
 199
 200static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
 201{
 202        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 203        struct xen_spinlock *prev;
 204        int irq = __this_cpu_read(lock_kicker_irq);
 205        int ret;
 206        u64 start;
 207
 208        /* If kicker interrupts not initialized yet, just spin */
 209        if (irq == -1)
 210                return 0;
 211
 212        start = spin_time_start();
 213
 214        /* announce we're spinning */
 215        prev = spinning_lock(xl);
 216
 217        ADD_STATS(taken_slow, 1);
 218        ADD_STATS(taken_slow_nested, prev != NULL);
 219
 220        do {
 221                unsigned long flags;
 222
 223                /* clear pending */
 224                xen_clear_irq_pending(irq);
 225
 226                /* check again make sure it didn't become free while
 227                   we weren't looking  */
 228                ret = xen_spin_trylock(lock);
 229                if (ret) {
 230                        ADD_STATS(taken_slow_pickup, 1);
 231
 232                        /*
 233                         * If we interrupted another spinlock while it
 234                         * was blocking, make sure it doesn't block
 235                         * without rechecking the lock.
 236                         */
 237                        if (prev != NULL)
 238                                xen_set_irq_pending(irq);
 239                        goto out;
 240                }
 241
 242                flags = arch_local_save_flags();
 243                if (irq_enable) {
 244                        ADD_STATS(taken_slow_irqenable, 1);
 245                        raw_local_irq_enable();
 246                }
 247
 248                /*
 249                 * Block until irq becomes pending.  If we're
 250                 * interrupted at this point (after the trylock but
 251                 * before entering the block), then the nested lock
 252                 * handler guarantees that the irq will be left
 253                 * pending if there's any chance the lock became free;
 254                 * xen_poll_irq() returns immediately if the irq is
 255                 * pending.
 256                 */
 257                xen_poll_irq(irq);
 258
 259                raw_local_irq_restore(flags);
 260
 261                ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 262        } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 263
 264        kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 265
 266out:
 267        unspinning_lock(xl, prev);
 268        spin_time_accum_blocked(start);
 269
 270        return ret;
 271}
 272
 273static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
 274{
 275        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 276        unsigned timeout;
 277        u8 oldval;
 278        u64 start_spin;
 279
 280        ADD_STATS(taken, 1);
 281
 282        start_spin = spin_time_start();
 283
 284        do {
 285                u64 start_spin_fast = spin_time_start();
 286
 287                timeout = TIMEOUT;
 288
 289                asm("1: xchgb %1,%0\n"
 290                    "   testb %1,%1\n"
 291                    "   jz 3f\n"
 292                    "2: rep;nop\n"
 293                    "   cmpb $0,%0\n"
 294                    "   je 1b\n"
 295                    "   dec %2\n"
 296                    "   jnz 2b\n"
 297                    "3:\n"
 298                    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
 299                    : "1" (1)
 300                    : "memory");
 301
 302                spin_time_accum_spinning(start_spin_fast);
 303
 304        } while (unlikely(oldval != 0 &&
 305                          (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
 306
 307        spin_time_accum_total(start_spin);
 308}
 309
 310static void xen_spin_lock(struct arch_spinlock *lock)
 311{
 312        __xen_spin_lock(lock, false);
 313}
 314
 315static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
 316{
 317        __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
 318}
 319
 320static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 321{
 322        int cpu;
 323
 324        ADD_STATS(released_slow, 1);
 325
 326        for_each_online_cpu(cpu) {
 327                /* XXX should mix up next cpu selection */
 328                if (per_cpu(lock_spinners, cpu) == xl) {
 329                        ADD_STATS(released_slow_kicked, 1);
 330                        xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 331                        break;
 332                }
 333        }
 334}
 335
 336static void xen_spin_unlock(struct arch_spinlock *lock)
 337{
 338        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 339
 340        ADD_STATS(released, 1);
 341
 342        smp_wmb();              /* make sure no writes get moved after unlock */
 343        xl->lock = 0;           /* release lock */
 344
 345        /*
 346         * Make sure unlock happens before checking for waiting
 347         * spinners.  We need a strong barrier to enforce the
 348         * write-read ordering to different memory locations, as the
 349         * CPU makes no implied guarantees about their ordering.
 350         */
 351        mb();
 352
 353        if (unlikely(xl->spinners))
 354                xen_spin_unlock_slow(xl);
 355}
 356
 357static irqreturn_t dummy_handler(int irq, void *dev_id)
 358{
 359        BUG();
 360        return IRQ_HANDLED;
 361}
 362
 363void __cpuinit xen_init_lock_cpu(int cpu)
 364{
 365        int irq;
 366        const char *name;
 367
 368        name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
 369        irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
 370                                     cpu,
 371                                     dummy_handler,
 372                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 373                                     name,
 374                                     NULL);
 375
 376        if (irq >= 0) {
 377                disable_irq(irq); /* make sure it's never delivered */
 378                per_cpu(lock_kicker_irq, cpu) = irq;
 379        }
 380
 381        printk("cpu %d spinlock event irq %d\n", cpu, irq);
 382}
 383
 384void xen_uninit_lock_cpu(int cpu)
 385{
 386        unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
 387}
 388
 389void __init xen_init_spinlocks(void)
 390{
 391        BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
 392
 393        pv_lock_ops.spin_is_locked = xen_spin_is_locked;
 394        pv_lock_ops.spin_is_contended = xen_spin_is_contended;
 395        pv_lock_ops.spin_lock = xen_spin_lock;
 396        pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
 397        pv_lock_ops.spin_trylock = xen_spin_trylock;
 398        pv_lock_ops.spin_unlock = xen_spin_unlock;
 399}
 400
 401#ifdef CONFIG_XEN_DEBUG_FS
 402
 403static struct dentry *d_spin_debug;
 404
 405static int __init xen_spinlock_debugfs(void)
 406{
 407        struct dentry *d_xen = xen_init_debugfs();
 408
 409        if (d_xen == NULL)
 410                return -ENOMEM;
 411
 412        d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
 413
 414        debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
 415
 416        debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
 417
 418        debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
 419        debugfs_create_u32("taken_slow", 0444, d_spin_debug,
 420                           &spinlock_stats.taken_slow);
 421        debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
 422                           &spinlock_stats.taken_slow_nested);
 423        debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
 424                           &spinlock_stats.taken_slow_pickup);
 425        debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
 426                           &spinlock_stats.taken_slow_spurious);
 427        debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
 428                           &spinlock_stats.taken_slow_irqenable);
 429
 430        debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 431        debugfs_create_u32("released_slow", 0444, d_spin_debug,
 432                           &spinlock_stats.released_slow);
 433        debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
 434                           &spinlock_stats.released_slow_kicked);
 435
 436        debugfs_create_u64("time_spinning", 0444, d_spin_debug,
 437                           &spinlock_stats.time_spinning);
 438        debugfs_create_u64("time_blocked", 0444, d_spin_debug,
 439                           &spinlock_stats.time_blocked);
 440        debugfs_create_u64("time_total", 0444, d_spin_debug,
 441                           &spinlock_stats.time_total);
 442
 443        xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
 444                                     spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
 445        xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
 446                                     spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
 447        xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
 448                                     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 449
 450        return 0;
 451}
 452fs_initcall(xen_spinlock_debugfs);
 453
 454#endif  /* CONFIG_XEN_DEBUG_FS */
 455
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.