linux/kernel/hrtimer.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/hrtimer.c
   3 *
   4 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   5 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   6 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
   7 *
   8 *  High-resolution kernel timers
   9 *
  10 *  In contrast to the low-resolution timeout API implemented in
  11 *  kernel/timer.c, hrtimers provide finer resolution and accuracy
  12 *  depending on system configuration and capabilities.
  13 *
  14 *  These timers are currently used for:
  15 *   - itimers
  16 *   - POSIX timers
  17 *   - nanosleep
  18 *   - precise in-kernel timing
  19 *
  20 *  Started by: Thomas Gleixner and Ingo Molnar
  21 *
  22 *  Credits:
  23 *      based on kernel/timer.c
  24 *
  25 *      Help, testing, suggestions, bugfixes, improvements were
  26 *      provided by:
  27 *
  28 *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
  29 *      et. al.
  30 *
  31 *  For licencing details see kernel-base/COPYING
  32 */
  33
  34#include <linux/cpu.h>
  35#include <linux/irq.h>
  36#include <linux/module.h>
  37#include <linux/percpu.h>
  38#include <linux/hrtimer.h>
  39#include <linux/notifier.h>
  40#include <linux/syscalls.h>
  41#include <linux/kallsyms.h>
  42#include <linux/interrupt.h>
  43#include <linux/tick.h>
  44#include <linux/seq_file.h>
  45#include <linux/err.h>
  46#include <linux/debugobjects.h>
  47
  48#include <asm/uaccess.h>
  49
  50/**
  51 * ktime_get - get the monotonic time in ktime_t format
  52 *
  53 * returns the time in ktime_t format
  54 */
  55ktime_t ktime_get(void)
  56{
  57        struct timespec now;
  58
  59        ktime_get_ts(&now);
  60
  61        return timespec_to_ktime(now);
  62}
  63EXPORT_SYMBOL_GPL(ktime_get);
  64
  65/**
  66 * ktime_get_real - get the real (wall-) time in ktime_t format
  67 *
  68 * returns the time in ktime_t format
  69 */
  70ktime_t ktime_get_real(void)
  71{
  72        struct timespec now;
  73
  74        getnstimeofday(&now);
  75
  76        return timespec_to_ktime(now);
  77}
  78
  79EXPORT_SYMBOL_GPL(ktime_get_real);
  80
  81/*
  82 * The timer bases:
  83 *
  84 * Note: If we want to add new timer bases, we have to skip the two
  85 * clock ids captured by the cpu-timers. We do this by holding empty
  86 * entries rather than doing math adjustment of the clock ids.
  87 * This ensures that we capture erroneous accesses to these clock ids
  88 * rather than moving them into the range of valid clock id's.
  89 */
  90DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
  91{
  92
  93        .clock_base =
  94        {
  95                {
  96                        .index = CLOCK_REALTIME,
  97                        .get_time = &ktime_get_real,
  98                        .resolution = KTIME_LOW_RES,
  99                },
 100                {
 101                        .index = CLOCK_MONOTONIC,
 102                        .get_time = &ktime_get,
 103                        .resolution = KTIME_LOW_RES,
 104                },
 105        }
 106};
 107
 108/**
 109 * ktime_get_ts - get the monotonic clock in timespec format
 110 * @ts:         pointer to timespec variable
 111 *
 112 * The function calculates the monotonic clock from the realtime
 113 * clock and the wall_to_monotonic offset and stores the result
 114 * in normalized timespec format in the variable pointed to by @ts.
 115 */
 116void ktime_get_ts(struct timespec *ts)
 117{
 118        struct timespec tomono;
 119        unsigned long seq;
 120
 121        do {
 122                seq = read_seqbegin(&xtime_lock);
 123                getnstimeofday(ts);
 124                tomono = wall_to_monotonic;
 125
 126        } while (read_seqretry(&xtime_lock, seq));
 127
 128        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
 129                                ts->tv_nsec + tomono.tv_nsec);
 130}
 131EXPORT_SYMBOL_GPL(ktime_get_ts);
 132
 133/*
 134 * Get the coarse grained time at the softirq based on xtime and
 135 * wall_to_monotonic.
 136 */
 137static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 138{
 139        ktime_t xtim, tomono;
 140        struct timespec xts, tom;
 141        unsigned long seq;
 142
 143        do {
 144                seq = read_seqbegin(&xtime_lock);
 145                xts = current_kernel_time();
 146                tom = wall_to_monotonic;
 147        } while (read_seqretry(&xtime_lock, seq));
 148
 149        xtim = timespec_to_ktime(xts);
 150        tomono = timespec_to_ktime(tom);
 151        base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
 152        base->clock_base[CLOCK_MONOTONIC].softirq_time =
 153                ktime_add(xtim, tomono);
 154}
 155
 156/*
 157 * Functions and macros which are different for UP/SMP systems are kept in a
 158 * single place
 159 */
 160#ifdef CONFIG_SMP
 161
 162/*
 163 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 164 * means that all timers which are tied to this base via timer->base are
 165 * locked, and the base itself is locked too.
 166 *
 167 * So __run_timers/migrate_timers can safely modify all timers which could
 168 * be found on the lists/queues.
 169 *
 170 * When the timer's base is locked, and the timer removed from list, it is
 171 * possible to set timer->base = NULL and drop the lock: the timer remains
 172 * locked.
 173 */
 174static
 175struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 176                                             unsigned long *flags)
 177{
 178        struct hrtimer_clock_base *base;
 179
 180        for (;;) {
 181                base = timer->base;
 182                if (likely(base != NULL)) {
 183                        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 184                        if (likely(base == timer->base))
 185                                return base;
 186                        /* The timer has migrated to another CPU: */
 187                        spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
 188                }
 189                cpu_relax();
 190        }
 191}
 192
 193/*
 194 * Switch the timer base to the current CPU when possible.
 195 */
 196static inline struct hrtimer_clock_base *
 197switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
 198{
 199        struct hrtimer_clock_base *new_base;
 200        struct hrtimer_cpu_base *new_cpu_base;
 201
 202        new_cpu_base = &__get_cpu_var(hrtimer_bases);
 203        new_base = &new_cpu_base->clock_base[base->index];
 204
 205        if (base != new_base) {
 206                /*
 207                 * We are trying to schedule the timer on the local CPU.
 208                 * However we can't change timer's base while it is running,
 209                 * so we keep it on the same CPU. No hassle vs. reprogramming
 210                 * the event source in the high resolution case. The softirq
 211                 * code will take care of this when the timer function has
 212                 * completed. There is no conflict as we hold the lock until
 213                 * the timer is enqueued.
 214                 */
 215                if (unlikely(hrtimer_callback_running(timer)))
 216                        return base;
 217
 218                /* See the comment in lock_timer_base() */
 219                timer->base = NULL;
 220                spin_unlock(&base->cpu_base->lock);
 221                spin_lock(&new_base->cpu_base->lock);
 222                timer->base = new_base;
 223        }
 224        return new_base;
 225}
 226
 227#else /* CONFIG_SMP */
 228
 229static inline struct hrtimer_clock_base *
 230lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 231{
 232        struct hrtimer_clock_base *base = timer->base;
 233
 234        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 235
 236        return base;
 237}
 238
 239# define switch_hrtimer_base(t, b)      (b)
 240
 241#endif  /* !CONFIG_SMP */
 242
 243/*
 244 * Functions for the union type storage format of ktime_t which are
 245 * too large for inlining:
 246 */
 247#if BITS_PER_LONG < 64
 248# ifndef CONFIG_KTIME_SCALAR
 249/**
 250 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
 251 * @kt:         addend
 252 * @nsec:       the scalar nsec value to add
 253 *
 254 * Returns the sum of kt and nsec in ktime_t format
 255 */
 256ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
 257{
 258        ktime_t tmp;
 259
 260        if (likely(nsec < NSEC_PER_SEC)) {
 261                tmp.tv64 = nsec;
 262        } else {
 263                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 264
 265                tmp = ktime_set((long)nsec, rem);
 266        }
 267
 268        return ktime_add(kt, tmp);
 269}
 270
 271EXPORT_SYMBOL_GPL(ktime_add_ns);
 272
 273/**
 274 * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
 275 * @kt:         minuend
 276 * @nsec:       the scalar nsec value to subtract
 277 *
 278 * Returns the subtraction of @nsec from @kt in ktime_t format
 279 */
 280ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
 281{
 282        ktime_t tmp;
 283
 284        if (likely(nsec < NSEC_PER_SEC)) {
 285                tmp.tv64 = nsec;
 286        } else {
 287                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 288
 289                tmp = ktime_set((long)nsec, rem);
 290        }
 291
 292        return ktime_sub(kt, tmp);
 293}
 294
 295EXPORT_SYMBOL_GPL(ktime_sub_ns);
 296# endif /* !CONFIG_KTIME_SCALAR */
 297
 298/*
 299 * Divide a ktime value by a nanosecond value
 300 */
 301u64 ktime_divns(const ktime_t kt, s64 div)
 302{
 303        u64 dclc, inc, dns;
 304        int sft = 0;
 305
 306        dclc = dns = ktime_to_ns(kt);
 307        inc = div;
 308        /* Make sure the divisor is less than 2^32: */
 309        while (div >> 32) {
 310                sft++;
 311                div >>= 1;
 312        }
 313        dclc >>= sft;
 314        do_div(dclc, (unsigned long) div);
 315
 316        return dclc;
 317}
 318#endif /* BITS_PER_LONG >= 64 */
 319
 320/*
 321 * Add two ktime values and do a safety check for overflow:
 322 */
 323ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
 324{
 325        ktime_t res = ktime_add(lhs, rhs);
 326
 327        /*
 328         * We use KTIME_SEC_MAX here, the maximum timeout which we can
 329         * return to user space in a timespec:
 330         */
 331        if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
 332                res = ktime_set(KTIME_SEC_MAX, 0);
 333
 334        return res;
 335}
 336
 337#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 338
 339static struct debug_obj_descr hrtimer_debug_descr;
 340
 341/*
 342 * fixup_init is called when:
 343 * - an active object is initialized
 344 */
 345static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
 346{
 347        struct hrtimer *timer = addr;
 348
 349        switch (state) {
 350        case ODEBUG_STATE_ACTIVE:
 351                hrtimer_cancel(timer);
 352                debug_object_init(timer, &hrtimer_debug_descr);
 353                return 1;
 354        default:
 355                return 0;
 356        }
 357}
 358
 359/*
 360 * fixup_activate is called when:
 361 * - an active object is activated
 362 * - an unknown object is activated (might be a statically initialized object)
 363 */
 364static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
 365{
 366        switch (state) {
 367
 368        case ODEBUG_STATE_NOTAVAILABLE:
 369                WARN_ON_ONCE(1);
 370                return 0;
 371
 372        case ODEBUG_STATE_ACTIVE:
 373                WARN_ON(1);
 374
 375        default:
 376                return 0;
 377        }
 378}
 379
 380/*
 381 * fixup_free is called when:
 382 * - an active object is freed
 383 */
 384static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
 385{
 386        struct hrtimer *timer = addr;
 387
 388        switch (state) {
 389        case ODEBUG_STATE_ACTIVE:
 390                hrtimer_cancel(timer);
 391                debug_object_free(timer, &hrtimer_debug_descr);
 392                return 1;
 393        default:
 394                return 0;
 395        }
 396}
 397
 398static struct debug_obj_descr hrtimer_debug_descr = {
 399        .name           = "hrtimer",
 400        .fixup_init     = hrtimer_fixup_init,
 401        .fixup_activate = hrtimer_fixup_activate,
 402        .fixup_free     = hrtimer_fixup_free,
 403};
 404
 405static inline void debug_hrtimer_init(struct hrtimer *timer)
 406{
 407        debug_object_init(timer, &hrtimer_debug_descr);
 408}
 409
 410static inline void debug_hrtimer_activate(struct hrtimer *timer)
 411{
 412        debug_object_activate(timer, &hrtimer_debug_descr);
 413}
 414
 415static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
 416{
 417        debug_object_deactivate(timer, &hrtimer_debug_descr);
 418}
 419
 420static inline void debug_hrtimer_free(struct hrtimer *timer)
 421{
 422        debug_object_free(timer, &hrtimer_debug_descr);
 423}
 424
 425static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 426                           enum hrtimer_mode mode);
 427
 428void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
 429                           enum hrtimer_mode mode)
 430{
 431        debug_object_init_on_stack(timer, &hrtimer_debug_descr);
 432        __hrtimer_init(timer, clock_id, mode);
 433}
 434
 435void destroy_hrtimer_on_stack(struct hrtimer *timer)
 436{
 437        debug_object_free(timer, &hrtimer_debug_descr);
 438}
 439
 440#else
 441static inline void debug_hrtimer_init(struct hrtimer *timer) { }
 442static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 443static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 444#endif
 445
 446/*
 447 * Check, whether the timer is on the callback pending list
 448 */
 449static inline int hrtimer_cb_pending(const struct hrtimer *timer)
 450{
 451        return timer->state & HRTIMER_STATE_PENDING;
 452}
 453
 454/*
 455 * Remove a timer from the callback pending list
 456 */
 457static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
 458{
 459        list_del_init(&timer->cb_entry);
 460}
 461
 462/* High resolution timer related functions */
 463#ifdef CONFIG_HIGH_RES_TIMERS
 464
 465/*
 466 * High resolution timer enabled ?
 467 */
 468static int hrtimer_hres_enabled __read_mostly  = 1;
 469
 470/*
 471 * Enable / Disable high resolution mode
 472 */
 473static int __init setup_hrtimer_hres(char *str)
 474{
 475        if (!strcmp(str, "off"))
 476                hrtimer_hres_enabled = 0;
 477        else if (!strcmp(str, "on"))
 478                hrtimer_hres_enabled = 1;
 479        else
 480                return 0;
 481        return 1;
 482}
 483
 484__setup("highres=", setup_hrtimer_hres);
 485
 486/*
 487 * hrtimer_high_res_enabled - query, if the highres mode is enabled
 488 */
 489static inline int hrtimer_is_hres_enabled(void)
 490{
 491        return hrtimer_hres_enabled;
 492}
 493
 494/*
 495 * Is the high resolution mode active ?
 496 */
 497static inline int hrtimer_hres_active(void)
 498{
 499        return __get_cpu_var(hrtimer_bases).hres_active;
 500}
 501
 502/*
 503 * Reprogram the event source with checking both queues for the
 504 * next event
 505 * Called with interrupts disabled and base->lock held
 506 */
 507static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
 508{
 509        int i;
 510        struct hrtimer_clock_base *base = cpu_base->clock_base;
 511        ktime_t expires;
 512
 513        cpu_base->expires_next.tv64 = KTIME_MAX;
 514
 515        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 516                struct hrtimer *timer;
 517
 518                if (!base->first)
 519                        continue;
 520                timer = rb_entry(base->first, struct hrtimer, node);
 521                expires = ktime_sub(timer->expires, base->offset);
 522                if (expires.tv64 < cpu_base->expires_next.tv64)
 523                        cpu_base->expires_next = expires;
 524        }
 525
 526        if (cpu_base->expires_next.tv64 != KTIME_MAX)
 527                tick_program_event(cpu_base->expires_next, 1);
 528}
 529
 530/*
 531 * Shared reprogramming for clock_realtime and clock_monotonic
 532 *
 533 * When a timer is enqueued and expires earlier than the already enqueued
 534 * timers, we have to check, whether it expires earlier than the timer for
 535 * which the clock event device was armed.
 536 *
 537 * Called with interrupts disabled and base->cpu_base.lock held
 538 */
 539static int hrtimer_reprogram(struct hrtimer *timer,
 540                             struct hrtimer_clock_base *base)
 541{
 542        ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
 543        ktime_t expires = ktime_sub(timer->expires, base->offset);
 544        int res;
 545
 546        WARN_ON_ONCE(timer->expires.tv64 < 0);
 547
 548        /*
 549         * When the callback is running, we do not reprogram the clock event
 550         * device. The timer callback is either running on a different CPU or
 551         * the callback is executed in the hrtimer_interrupt context. The
 552         * reprogramming is handled either by the softirq, which called the
 553         * callback or at the end of the hrtimer_interrupt.
 554         */
 555        if (hrtimer_callback_running(timer))
 556                return 0;
 557
 558        /*
 559         * CLOCK_REALTIME timer might be requested with an absolute
 560         * expiry time which is less than base->offset. Nothing wrong
 561         * about that, just avoid to call into the tick code, which
 562         * has now objections against negative expiry values.
 563         */
 564        if (expires.tv64 < 0)
 565                return -ETIME;
 566
 567        if (expires.tv64 >= expires_next->tv64)
 568                return 0;
 569
 570        /*
 571         * Clockevents returns -ETIME, when the event was in the past.
 572         */
 573        res = tick_program_event(expires, 0);
 574        if (!IS_ERR_VALUE(res))
 575                *expires_next = expires;
 576        return res;
 577}
 578
 579
 580/*
 581 * Retrigger next event is called after clock was set
 582 *
 583 * Called with interrupts disabled via on_each_cpu()
 584 */
 585static void retrigger_next_event(void *arg)
 586{
 587        struct hrtimer_cpu_base *base;
 588        struct timespec realtime_offset;
 589        unsigned long seq;
 590
 591        if (!hrtimer_hres_active())
 592                return;
 593
 594        do {
 595                seq = read_seqbegin(&xtime_lock);
 596                set_normalized_timespec(&realtime_offset,
 597                                        -wall_to_monotonic.tv_sec,
 598                                        -wall_to_monotonic.tv_nsec);
 599        } while (read_seqretry(&xtime_lock, seq));
 600
 601        base = &__get_cpu_var(hrtimer_bases);
 602
 603        /* Adjust CLOCK_REALTIME offset */
 604        spin_lock(&base->lock);
 605        base->clock_base[CLOCK_REALTIME].offset =
 606                timespec_to_ktime(realtime_offset);
 607
 608        hrtimer_force_reprogram(base);
 609        spin_unlock(&base->lock);
 610}
 611
 612/*
 613 * Clock realtime was set
 614 *
 615 * Change the offset of the realtime clock vs. the monotonic
 616 * clock.
 617 *
 618 * We might have to reprogram the high resolution timer interrupt. On
 619 * SMP we call the architecture specific code to retrigger _all_ high
 620 * resolution timer interrupts. On UP we just disable interrupts and
 621 * call the high resolution interrupt code.
 622 */
 623void clock_was_set(void)
 624{
 625        /* Retrigger the CPU local events everywhere */
 626        on_each_cpu(retrigger_next_event, NULL, 0, 1);
 627}
 628
 629/*
 630 * During resume we might have to reprogram the high resolution timer
 631 * interrupt (on the local CPU):
 632 */
 633void hres_timers_resume(void)
 634{
 635        WARN_ON_ONCE(num_online_cpus() > 1);
 636
 637        /* Retrigger the CPU local events: */
 638        retrigger_next_event(NULL);
 639}
 640
 641/*
 642 * Initialize the high resolution related parts of cpu_base
 643 */
 644static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
 645{
 646        base->expires_next.tv64 = KTIME_MAX;
 647        base->hres_active = 0;
 648}
 649
 650/*
 651 * Initialize the high resolution related parts of a hrtimer
 652 */
 653static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 654{
 655}
 656
 657/*
 658 * When High resolution timers are active, try to reprogram. Note, that in case
 659 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
 660 * check happens. The timer gets enqueued into the rbtree. The reprogramming
 661 * and expiry check is done in the hrtimer_interrupt or in the softirq.
 662 */
 663static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 664                                            struct hrtimer_clock_base *base)
 665{
 666        if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
 667
 668                /* Timer is expired, act upon the callback mode */
 669                switch(timer->cb_mode) {
 670                case HRTIMER_CB_IRQSAFE_NO_RESTART:
 671                        debug_hrtimer_deactivate(timer);
 672                        /*
 673                         * We can call the callback from here. No restart
 674                         * happens, so no danger of recursion
 675                         */
 676                        BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
 677                        return 1;
 678                case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
 679                        /*
 680                         * This is solely for the sched tick emulation with
 681                         * dynamic tick support to ensure that we do not
 682                         * restart the tick right on the edge and end up with
 683                         * the tick timer in the softirq ! The calling site
 684                         * takes care of this.
 685                         */
 686                        debug_hrtimer_deactivate(timer);
 687                        return 1;
 688                case HRTIMER_CB_IRQSAFE:
 689                case HRTIMER_CB_SOFTIRQ:
 690                        /*
 691                         * Move everything else into the softirq pending list !
 692                         */
 693                        list_add_tail(&timer->cb_entry,
 694                                      &base->cpu_base->cb_pending);
 695                        timer->state = HRTIMER_STATE_PENDING;
 696                        return 1;
 697                default:
 698                        BUG();
 699                }
 700        }
 701        return 0;
 702}
 703
 704/*
 705 * Switch to high resolution mode
 706 */
 707static int hrtimer_switch_to_hres(void)
 708{
 709        int cpu = smp_processor_id();
 710        struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 711        unsigned long flags;
 712
 713        if (base->hres_active)
 714                return 1;
 715
 716        local_irq_save(flags);
 717
 718        if (tick_init_highres()) {
 719                local_irq_restore(flags);
 720                printk(KERN_WARNING "Could not switch to high resolution "
 721                                    "mode on CPU %d\n", cpu);
 722                return 0;
 723        }
 724        base->hres_active = 1;
 725        base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
 726        base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
 727
 728        tick_setup_sched_timer();
 729
 730        /* "Retrigger" the interrupt to get things going */
 731        retrigger_next_event(NULL);
 732        local_irq_restore(flags);
 733        printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
 734               smp_processor_id());
 735        return 1;
 736}
 737
 738static inline void hrtimer_raise_softirq(void)
 739{
 740        raise_softirq(HRTIMER_SOFTIRQ);
 741}
 742
 743#else
 744
 745static inline int hrtimer_hres_active(void) { return 0; }
 746static inline int hrtimer_is_hres_enabled(void) { return 0; }
 747static inline int hrtimer_switch_to_hres(void) { return 0; }
 748static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
 749static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 750                                            struct hrtimer_clock_base *base)
 751{
 752        return 0;
 753}
 754static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 755static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
 756static inline int hrtimer_reprogram(struct hrtimer *timer,
 757                                    struct hrtimer_clock_base *base)
 758{
 759        return 0;
 760}
 761static inline void hrtimer_raise_softirq(void) { }
 762
 763#endif /* CONFIG_HIGH_RES_TIMERS */
 764
 765#ifdef CONFIG_TIMER_STATS
 766void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
 767{
 768        if (timer->start_site)
 769                return;
 770
 771        timer->start_site = addr;
 772        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 773        timer->start_pid = current->pid;
 774}
 775#endif
 776
 777/*
 778 * Counterpart to lock_hrtimer_base above:
 779 */
 780static inline
 781void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 782{
 783        spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
 784}
 785
 786/**
 787 * hrtimer_forward - forward the timer expiry
 788 * @timer:      hrtimer to forward
 789 * @now:        forward past this time
 790 * @interval:   the interval to forward
 791 *
 792 * Forward the timer expiry so it will expire in the future.
 793 * Returns the number of overruns.
 794 */
 795u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 796{
 797        u64 orun = 1;
 798        ktime_t delta;
 799
 800        delta = ktime_sub(now, timer->expires);
 801
 802        if (delta.tv64 < 0)
 803                return 0;
 804
 805        if (interval.tv64 < timer->base->resolution.tv64)
 806                interval.tv64 = timer->base->resolution.tv64;
 807
 808        if (unlikely(delta.tv64 >= interval.tv64)) {
 809                s64 incr = ktime_to_ns(interval);
 810
 811                orun = ktime_divns(delta, incr);
 812                timer->expires = ktime_add_ns(timer->expires, incr * orun);
 813                if (timer->expires.tv64 > now.tv64)
 814                        return orun;
 815                /*
 816                 * This (and the ktime_add() below) is the
 817                 * correction for exact:
 818                 */
 819                orun++;
 820        }
 821        timer->expires = ktime_add_safe(timer->expires, interval);
 822
 823        return orun;
 824}
 825EXPORT_SYMBOL_GPL(hrtimer_forward);
 826
 827/*
 828 * enqueue_hrtimer - internal function to (re)start a timer
 829 *
 830 * The timer is inserted in expiry order. Insertion into the
 831 * red black tree is O(log(n)). Must hold the base lock.
 832 */
 833static void enqueue_hrtimer(struct hrtimer *timer,
 834                            struct hrtimer_clock_base *base, int reprogram)
 835{
 836        struct rb_node **link = &base->active.rb_node;
 837        struct rb_node *parent = NULL;
 838        struct hrtimer *entry;
 839        int leftmost = 1;
 840
 841        debug_hrtimer_activate(timer);
 842
 843        /*
 844         * Find the right place in the rbtree:
 845         */
 846        while (*link) {
 847                parent = *link;
 848                entry = rb_entry(parent, struct hrtimer, node);
 849                /*
 850                 * We dont care about collisions. Nodes with
 851                 * the same expiry time stay together.
 852                 */
 853                if (timer->expires.tv64 < entry->expires.tv64) {
 854                        link = &(*link)->rb_left;
 855                } else {
 856                        link = &(*link)->rb_right;
 857                        leftmost = 0;
 858                }
 859        }
 860
 861        /*
 862         * Insert the timer to the rbtree and check whether it
 863         * replaces the first pending timer
 864         */
 865        if (leftmost) {
 866                /*
 867                 * Reprogram the clock event device. When the timer is already
 868                 * expired hrtimer_enqueue_reprogram has either called the
 869                 * callback or added it to the pending list and raised the
 870                 * softirq.
 871                 *
 872                 * This is a NOP for !HIGHRES
 873                 */
 874                if (reprogram && hrtimer_enqueue_reprogram(timer, base))
 875                        return;
 876
 877                base->first = &timer->node;
 878        }
 879
 880        rb_link_node(&timer->node, parent, link);
 881        rb_insert_color(&timer->node, &base->active);
 882        /*
 883         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
 884         * state of a possibly running callback.
 885         */
 886        timer->state |= HRTIMER_STATE_ENQUEUED;
 887}
 888
 889/*
 890 * __remove_hrtimer - internal function to remove a timer
 891 *
 892 * Caller must hold the base lock.
 893 *
 894 * High resolution timer mode reprograms the clock event device when the
 895 * timer is the one which expires next. The caller can disable this by setting
 896 * reprogram to zero. This is useful, when the context does a reprogramming
 897 * anyway (e.g. timer interrupt)
 898 */
 899static void __remove_hrtimer(struct hrtimer *timer,
 900                             struct hrtimer_clock_base *base,
 901                             unsigned long newstate, int reprogram)
 902{
 903        /* High res. callback list. NOP for !HIGHRES */
 904        if (hrtimer_cb_pending(timer))
 905                hrtimer_remove_cb_pending(timer);
 906        else {
 907                /*
 908                 * Remove the timer from the rbtree and replace the
 909                 * first entry pointer if necessary.
 910                 */
 911                if (base->first == &timer->node) {
 912                        base->first = rb_next(&timer->node);
 913                        /* Reprogram the clock event device. if enabled */
 914                        if (reprogram && hrtimer_hres_active())
 915                                hrtimer_force_reprogram(base->cpu_base);
 916                }
 917                rb_erase(&timer->node, &base->active);
 918        }
 919        timer->state = newstate;
 920}
 921
 922/*
 923 * remove hrtimer, called with base lock held
 924 */
 925static inline int
 926remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 927{
 928        if (hrtimer_is_queued(timer)) {
 929                int reprogram;
 930
 931                /*
 932                 * Remove the timer and force reprogramming when high
 933                 * resolution mode is active and the timer is on the current
 934                 * CPU. If we remove a timer on another CPU, reprogramming is
 935                 * skipped. The interrupt event on this CPU is fired and
 936                 * reprogramming happens in the interrupt handler. This is a
 937                 * rare case and less expensive than a smp call.
 938                 */
 939                debug_hrtimer_deactivate(timer);
 940                timer_stats_hrtimer_clear_start_info(timer);
 941                reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 942                __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
 943                                 reprogram);
 944                return 1;
 945        }
 946        return 0;
 947}
 948
 949/**
 950 * hrtimer_start - (re)start an relative timer on the current CPU
 951 * @timer:      the timer to be added
 952 * @tim:        expiry time
 953 * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 954 *
 955 * Returns:
 956 *  0 on success
 957 *  1 when the timer was active
 958 */
 959int
 960hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 961{
 962        struct hrtimer_clock_base *base, *new_base;
 963        unsigned long flags;
 964        int ret, raise;
 965
 966        base = lock_hrtimer_base(timer, &flags);
 967
 968        /* Remove an active timer from the queue: */
 969        ret = remove_hrtimer(timer, base);
 970
 971        /* Switch the timer base, if necessary: */
 972        new_base = switch_hrtimer_base(timer, base);
 973
 974        if (mode == HRTIMER_MODE_REL) {
 975                tim = ktime_add_safe(tim, new_base->get_time());
 976                /*
 977                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
 978                 * to signal that they simply return xtime in
 979                 * do_gettimeoffset(). In this case we want to round up by
 980                 * resolution when starting a relative timer, to avoid short
 981                 * timeouts. This will go away with the GTOD framework.
 982                 */
 983#ifdef CONFIG_TIME_LOW_RES
 984                tim = ktime_add_safe(tim, base->resolution);
 985#endif
 986        }
 987
 988        timer->expires = tim;
 989
 990        timer_stats_hrtimer_set_start_info(timer);
 991
 992        /*
 993         * Only allow reprogramming if the new base is on this CPU.
 994         * (it might still be on another CPU if the timer was pending)
 995         */
 996        enqueue_hrtimer(timer, new_base,
 997                        new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 998
 999        /*
1000         * The timer may be expired and moved to the cb_pending
1001         * list. We can not raise the softirq with base lock held due
1002         * to a possible deadlock with runqueue lock.
1003         */
1004        raise = timer->state == HRTIMER_STATE_PENDING;
1005
1006        /*
1007         * We use preempt_disable to prevent this task from migrating after
1008         * setting up the softirq and raising it. Otherwise, if me migrate
1009         * we will raise the softirq on the wrong CPU.
1010         */
1011        preempt_disable();
1012
1013        unlock_hrtimer_base(timer, &flags);
1014
1015        if (raise)
1016                hrtimer_raise_softirq();
1017        preempt_enable();
1018
1019        return ret;
1020}
1021EXPORT_SYMBOL_GPL(hrtimer_start);
1022
1023/**
1024 * hrtimer_try_to_cancel - try to deactivate a timer
1025 * @timer:      hrtimer to stop
1026 *
1027 * Returns:
1028 *  0 when the timer was not active
1029 *  1 when the timer was active
1030 * -1 when the timer is currently excuting the callback function and
1031 *    cannot be stopped
1032 */
1033int hrtimer_try_to_cancel(struct hrtimer *timer)
1034{
1035        struct hrtimer_clock_base *base;
1036        unsigned long flags;
1037        int ret = -1;
1038
1039        base = lock_hrtimer_base(timer, &flags);
1040
1041        if (!hrtimer_callback_running(timer))
1042                ret = remove_hrtimer(timer, base);
1043
1044        unlock_hrtimer_base(timer, &flags);
1045
1046        return ret;
1047
1048}
1049EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1050
1051/**
1052 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1053 * @timer:      the timer to be cancelled
1054 *
1055 * Returns:
1056 *  0 when the timer was not active
1057 *  1 when the timer was active
1058 */
1059int hrtimer_cancel(struct hrtimer *timer)
1060{
1061        for (;;) {
1062                int ret = hrtimer_try_to_cancel(timer);
1063
1064                if (ret >= 0)
1065                        return ret;
1066                cpu_relax();
1067        }
1068}
1069EXPORT_SYMBOL_GPL(hrtimer_cancel);
1070
1071/**
1072 * hrtimer_get_remaining - get remaining time for the timer
1073 * @timer:      the timer to read
1074 */
1075ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1076{
1077        struct hrtimer_clock_base *base;
1078        unsigned long flags;
1079        ktime_t rem;
1080
1081        base = lock_hrtimer_base(timer, &flags);
1082        rem = ktime_sub(timer->expires, base->get_time());
1083        unlock_hrtimer_base(timer, &flags);
1084
1085        return rem;
1086}
1087EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
1088
1089#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
1090/**
1091 * hrtimer_get_next_event - get the time until next expiry event
1092 *
1093 * Returns the delta to the next expiry event or KTIME_MAX if no timer
1094 * is pending.
1095 */
1096ktime_t hrtimer_get_next_event(void)
1097{
1098        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1099        struct hrtimer_clock_base *base = cpu_base->clock_base;
1100        ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
1101        unsigned long flags;
1102        int i;
1103
1104        spin_lock_irqsave(&cpu_base->lock, flags);
1105
1106        if (!hrtimer_hres_active()) {
1107                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
1108                        struct hrtimer *timer;
1109
1110                        if (!base->first)
1111                                continue;
1112
1113                        timer = rb_entry(base->first, struct hrtimer, node);
1114                        delta.tv64 = timer->expires.tv64;
1115                        delta = ktime_sub(delta, base->get_time());
1116                        if (delta.tv64 < mindelta.tv64)
1117                                mindelta.tv64 = delta.tv64;
1118                }
1119        }
1120
1121        spin_unlock_irqrestore(&cpu_base->lock, flags);
1122
1123        if (mindelta.tv64 < 0)
1124                mindelta.tv64 = 0;
1125        return mindelta;
1126}
1127#endif
1128
1129static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1130                           enum hrtimer_mode mode)
1131{
1132        struct hrtimer_cpu_base *cpu_base;
1133
1134        memset(timer, 0, sizeof(struct hrtimer));
1135
1136        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1137
1138        if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1139                clock_id = CLOCK_MONOTONIC;
1140
1141        timer->base = &cpu_base->clock_base[clock_id];
1142        INIT_LIST_HEAD(&timer->cb_entry);
1143        hrtimer_init_timer_hres(timer);
1144
1145#ifdef CONFIG_TIMER_STATS
1146        timer->start_site = NULL;
1147        timer->start_pid = -1;
1148        memset(timer->start_comm, 0, TASK_COMM_LEN);
1149#endif
1150}
1151
1152/**
1153 * hrtimer_init - initialize a timer to the given clock
1154 * @timer:      the timer to be initialized
1155 * @clock_id:   the clock to be used
1156 * @mode:       timer mode abs/rel
1157 */
1158void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1159                  enum hrtimer_mode mode)
1160{
1161        debug_hrtimer_init(timer);
1162        __hrtimer_init(timer, clock_id, mode);
1163}
1164EXPORT_SYMBOL_GPL(hrtimer_init);
1165
1166/**
1167 * hrtimer_get_res - get the timer resolution for a clock
1168 * @which_clock: which clock to query
1169 * @tp:          pointer to timespec variable to store the resolution
1170 *
1171 * Store the resolution of the clock selected by @which_clock in the
1172 * variable pointed to by @tp.
1173 */
1174int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1175{
1176        struct hrtimer_cpu_base *cpu_base;
1177
1178        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1179        *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution);
1180
1181        return 0;
1182}
1183EXPORT_SYMBOL_GPL(hrtimer_get_res);
1184
1185static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1186{
1187        spin_lock_irq(&cpu_base->lock);
1188
1189        while (!list_empty(&cpu_base->cb_pending)) {
1190                enum hrtimer_restart (*fn)(struct hrtimer *);
1191                struct hrtimer *timer;
1192                int restart;
1193
1194                timer = list_entry(cpu_base->cb_pending.next,
1195                                   struct hrtimer, cb_entry);
1196
1197                debug_hrtimer_deactivate(timer);
1198                timer_stats_account_hrtimer(timer);
1199
1200                fn = timer->function;
1201                __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1202                spin_unlock_irq(&cpu_base->lock);
1203
1204                restart = fn(timer);
1205
1206                spin_lock_irq(&cpu_base->lock);
1207
1208                timer->state &= ~HRTIMER_STATE_CALLBACK;
1209                if (restart == HRTIMER_RESTART) {
1210                        BUG_ON(hrtimer_active(timer));
1211                        /*
1212                         * Enqueue the timer, allow reprogramming of the event
1213                         * device
1214                         */
1215                        enqueue_hrtimer(timer, timer->base, 1);
1216                } else if (hrtimer_active(timer)) {
1217                        /*
1218                         * If the timer was rearmed on another CPU, reprogram
1219                         * the event device.
1220                         */
1221                        struct hrtimer_clock_base *base = timer->base;
1222
1223                        if (base->first == &timer->node &&
1224                            hrtimer_reprogram(timer, base)) {
1225                                /*
1226                                 * Timer is expired. Thus move it from tree to
1227                                 * pending list again.
1228                                 */
1229                                __remove_hrtimer(timer, base,
1230                                                 HRTIMER_STATE_PENDING, 0);
1231                                list_add_tail(&timer->cb_entry,
1232                                              &base->cpu_base->cb_pending);
1233                        }
1234                }
1235        }
1236        spin_unlock_irq(&cpu_base->lock);
1237}
1238
1239static void __run_hrtimer(struct hrtimer *timer)
1240{
1241        struct hrtimer_clock_base *base = timer->base;
1242        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1243        enum hrtimer_restart (*fn)(struct hrtimer *);
1244        int restart;
1245
1246        debug_hrtimer_deactivate(timer);
1247        __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1248        timer_stats_account_hrtimer(timer);
1249
1250        fn = timer->function;
1251        if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
1252                /*
1253                 * Used for scheduler timers, avoid lock inversion with
1254                 * rq->lock and tasklist_lock.
1255                 *
1256                 * These timers are required to deal with enqueue expiry
1257                 * themselves and are not allowed to migrate.
1258                 */
1259                spin_unlock(&cpu_base->lock);
1260                restart = fn(timer);
1261                spin_lock(&cpu_base->lock);
1262        } else
1263                restart = fn(timer);
1264
1265        /*
1266         * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
1267         * reprogramming of the event hardware. This happens at the end of this
1268         * function anyway.
1269         */
1270        if (restart != HRTIMER_NORESTART) {
1271                BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
1272                enqueue_hrtimer(timer, base, 0);
1273        }
1274        timer->state &= ~HRTIMER_STATE_CALLBACK;
1275}
1276
1277#ifdef CONFIG_HIGH_RES_TIMERS
1278
1279/*
1280 * High resolution timer interrupt
1281 * Called with interrupts disabled
1282 */
1283void hrtimer_interrupt(struct clock_event_device *dev)
1284{
1285        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1286        struct hrtimer_clock_base *base;
1287        ktime_t expires_next, now;
1288        int i, raise = 0;
1289
1290        BUG_ON(!cpu_base->hres_active);
1291        cpu_base->nr_events++;
1292        dev->next_event.tv64 = KTIME_MAX;
1293
1294 retry:
1295        now = ktime_get();
1296
1297        expires_next.tv64 = KTIME_MAX;
1298
1299        base = cpu_base->clock_base;
1300
1301        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1302                ktime_t basenow;
1303                struct rb_node *node;
1304
1305                spin_lock(&cpu_base->lock);
1306
1307                basenow = ktime_add(now, base->offset);
1308
1309                while ((node = base->first)) {
1310                        struct hrtimer *timer;
1311
1312                        timer = rb_entry(node, struct hrtimer, node);
1313
1314                        if (basenow.tv64 < timer->expires.tv64) {
1315                                ktime_t expires;
1316
1317                                expires = ktime_sub(timer->expires,
1318                                                    base->offset);
1319                                if (expires.tv64 < expires_next.tv64)
1320                                        expires_next = expires;
1321                                break;
1322                        }
1323
1324                        /* Move softirq callbacks to the pending list */
1325                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1326                                __remove_hrtimer(timer, base,
1327                                                 HRTIMER_STATE_PENDING, 0);
1328                                list_add_tail(&timer->cb_entry,
1329                                              &base->cpu_base->cb_pending);
1330                                raise = 1;
1331                                continue;
1332                        }
1333
1334                        __run_hrtimer(timer);
1335                }
1336                spin_unlock(&cpu_base->lock);
1337                base++;
1338        }
1339
1340        cpu_base->expires_next = expires_next;
1341
1342        /* Reprogramming necessary ? */
1343        if (expires_next.tv64 != KTIME_MAX) {
1344                if (tick_program_event(expires_next, 0))
1345                        goto retry;
1346        }
1347
1348        /* Raise softirq ? */
1349        if (raise)
1350                raise_softirq(HRTIMER_SOFTIRQ);
1351}
1352
1353static void run_hrtimer_softirq(struct softirq_action *h)
1354{
1355        run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
1356}
1357
1358#endif  /* CONFIG_HIGH_RES_TIMERS */
1359
1360/*
1361 * Called from timer softirq every jiffy, expire hrtimers:
1362 *
1363 * For HRT its the fall back code to run the softirq in the timer
1364 * softirq context in case the hrtimer initialization failed or has
1365 * not been done yet.
1366 */
1367void hrtimer_run_pending(void)
1368{
1369        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1370
1371        if (hrtimer_hres_active())
1372                return;
1373
1374        /*
1375         * This _is_ ugly: We have to check in the softirq context,
1376         * whether we can switch to highres and / or nohz mode. The
1377         * clocksource switch happens in the timer interrupt with
1378         * xtime_lock held. Notification from there only sets the
1379         * check bit in the tick_oneshot code, otherwise we might
1380         * deadlock vs. xtime_lock.
1381         */
1382        if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1383                hrtimer_switch_to_hres();
1384
1385        run_hrtimer_pending(cpu_base);
1386}
1387
1388/*
1389 * Called from hardirq context every jiffy
1390 */
1391void hrtimer_run_queues(void)
1392{
1393        struct rb_node *node;
1394        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1395        struct hrtimer_clock_base *base;
1396        int index, gettime = 1;
1397
1398        if (hrtimer_hres_active())
1399                return;
1400
1401        for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
1402                base = &cpu_base->clock_base[index];
1403
1404                if (!base->first)
1405                        continue;
1406
1407                if (base->get_softirq_time)
1408                        base->softirq_time = base->get_softirq_time();
1409                else if (gettime) {
1410                        hrtimer_get_softirq_time(cpu_base);
1411                        gettime = 0;
1412                }
1413
1414                spin_lock(&cpu_base->lock);
1415
1416                while ((node = base->first)) {
1417                        struct hrtimer *timer;
1418
1419                        timer = rb_entry(node, struct hrtimer, node);
1420                        if (base->softirq_time.tv64 <= timer->expires.tv64)
1421                                break;
1422
1423                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1424                                __remove_hrtimer(timer, base,
1425                                        HRTIMER_STATE_PENDING, 0);
1426                                list_add_tail(&timer->cb_entry,
1427                                        &base->cpu_base->cb_pending);
1428                                continue;
1429                        }
1430
1431                        __run_hrtimer(timer);
1432                }
1433                spin_unlock(&cpu_base->lock);
1434        }
1435}
1436
1437/*
1438 * Sleep related functions:
1439 */
1440static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1441{
1442        struct hrtimer_sleeper *t =
1443                container_of(timer, struct hrtimer_sleeper, timer);
1444        struct task_struct *task = t->task;
1445
1446        t->task = NULL;
1447        if (task)
1448                wake_up_process(task);
1449
1450        return HRTIMER_NORESTART;
1451}
1452
1453void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1454{
1455        sl->timer.function = hrtimer_wakeup;
1456        sl->task = task;
1457#ifdef CONFIG_HIGH_RES_TIMERS
1458        sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1459#endif
1460}
1461
1462static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
1463{
1464        hrtimer_init_sleeper(t, current);
1465
1466        do {
1467                set_current_state(TASK_INTERRUPTIBLE);
1468                hrtimer_start(&t->timer, t->timer.expires, mode);
1469                if (!hrtimer_active(&t->timer))
1470                        t->task = NULL;
1471
1472                if (likely(t->task))
1473                        schedule();
1474
1475                hrtimer_cancel(&t->timer);
1476                mode = HRTIMER_MODE_ABS;
1477
1478        } while (t->task && !signal_pending(current));
1479
1480        __set_current_state(TASK_RUNNING);
1481
1482        return t->task == NULL;
1483}
1484
1485static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1486{
1487        struct timespec rmt;
1488        ktime_t rem;
1489
1490        rem = ktime_sub(timer->expires, timer->base->get_time());
1491        if (rem.tv64 <= 0)
1492                return 0;
1493        rmt = ktime_to_timespec(rem);
1494
1495        if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
1496                return -EFAULT;
1497
1498        return 1;
1499}
1500
1501long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1502{
1503        struct hrtimer_sleeper t;
1504        struct timespec __user  *rmtp;
1505        int ret = 0;
1506
1507        hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
1508                                HRTIMER_MODE_ABS);
1509        t.timer.expires.tv64 = restart->nanosleep.expires;
1510
1511        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1512                goto out;
1513
1514        rmtp = restart->nanosleep.rmtp;
1515        if (rmtp) {
1516                ret = update_rmtp(&t.timer, rmtp);
1517                if (ret <= 0)
1518                        goto out;
1519        }
1520
1521        /* The other values in restart are already filled in */
1522        ret = -ERESTART_RESTARTBLOCK;
1523out:
1524        destroy_hrtimer_on_stack(&t.timer);
1525        return ret;
1526}
1527
1528long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1529                       const enum hrtimer_mode mode, const clockid_t clockid)
1530{
1531        struct restart_block *restart;
1532        struct hrtimer_sleeper t;
1533        int ret = 0;
1534
1535        hrtimer_init_on_stack(&t.timer, clockid, mode);
1536        t.timer.expires = timespec_to_ktime(*rqtp);
1537        if (do_nanosleep(&t, mode))
1538                goto out;
1539
1540        /* Absolute timers do not update the rmtp value and restart: */
1541        if (mode == HRTIMER_MODE_ABS) {
1542                ret = -ERESTARTNOHAND;
1543                goto out;
1544        }
1545
1546        if (rmtp) {
1547                ret = update_rmtp(&t.timer, rmtp);
1548                if (ret <= 0)
1549                        goto out;
1550        }
1551
1552        restart = &current_thread_info()->restart_block;
1553        restart->fn = hrtimer_nanosleep_restart;
1554        restart->nanosleep.index = t.timer.base->index;
1555        restart->nanosleep.rmtp = rmtp;
1556        restart->nanosleep.expires = t.timer.expires.tv64;
1557
1558        ret = -ERESTART_RESTARTBLOCK;
1559out:
1560        destroy_hrtimer_on_stack(&t.timer);
1561        return ret;
1562}
1563
1564asmlinkage long
1565sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
1566{
1567        struct timespec tu;
1568
1569        if (copy_from_user(&tu, rqtp, sizeof(tu)))
1570                return -EFAULT;
1571
1572        if (!timespec_valid(&tu))
1573                return -EINVAL;
1574
1575        return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
1576}
1577
1578/*
1579 * Functions related to boot-time initialization:
1580 */
1581static void __cpuinit init_hrtimers_cpu(int cpu)
1582{
1583        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1584        int i;
1585
1586        spin_lock_init(&cpu_base->lock);
1587
1588        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1589                cpu_base->clock_base[i].cpu_base = cpu_base;
1590
1591        INIT_LIST_HEAD(&cpu_base->cb_pending);
1592        hrtimer_init_hres(cpu_base);
1593}
1594
1595#ifdef CONFIG_HOTPLUG_CPU
1596
1597static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1598                                struct hrtimer_clock_base *new_base)
1599{
1600        struct hrtimer *timer;
1601        struct rb_node *node;
1602
1603        while ((node = rb_first(&old_base->active))) {
1604                timer = rb_entry(node, struct hrtimer, node);
1605                BUG_ON(hrtimer_callback_running(timer));
1606                debug_hrtimer_deactivate(timer);
1607                __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
1608                timer->base = new_base;
1609                /*
1610                 * Enqueue the timer. Allow reprogramming of the event device
1611                 */
1612                enqueue_hrtimer(timer, new_base, 1);
1613        }
1614}
1615
1616static void migrate_hrtimers(int cpu)
1617{
1618        struct hrtimer_cpu_base *old_base, *new_base;
1619        int i;
1620
1621        BUG_ON(cpu_online(cpu));
1622        old_base = &per_cpu(hrtimer_bases, cpu);
1623        new_base = &get_cpu_var(hrtimer_bases);
1624
1625        tick_cancel_sched_timer(cpu);
1626
1627        local_irq_disable();
1628        spin_lock(&new_base->lock);
1629        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1630
1631        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1632                migrate_hrtimer_list(&old_base->clock_base[i],
1633                                     &new_base->clock_base[i]);
1634        }
1635
1636        spin_unlock(&old_base->lock);
1637        spin_unlock(&new_base->lock);
1638        local_irq_enable();
1639        put_cpu_var(hrtimer_bases);
1640}
1641#endif /* CONFIG_HOTPLUG_CPU */
1642
1643static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1644                                        unsigned long action, void *hcpu)
1645{
1646        unsigned int cpu = (long)hcpu;
1647
1648        switch (action) {
1649
1650        case CPU_UP_PREPARE:
1651        case CPU_UP_PREPARE_FROZEN:
1652                init_hrtimers_cpu(cpu);
1653                break;
1654
1655#ifdef CONFIG_HOTPLUG_CPU
1656        case CPU_DEAD:
1657        case CPU_DEAD_FROZEN:
1658                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu);
1659                migrate_hrtimers(cpu);
1660                break;
1661#endif
1662
1663        default:
1664                break;
1665        }
1666
1667        return NOTIFY_OK;
1668}
1669
1670static struct notifier_block __cpuinitdata hrtimers_nb = {
1671        .notifier_call = hrtimer_cpu_notify,
1672};
1673
1674void __init hrtimers_init(void)
1675{
1676        hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1677                          (void *)(long)smp_processor_id());
1678        register_cpu_notifier(&hrtimers_nb);
1679#ifdef CONFIG_HIGH_RES_TIMERS
1680        open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
1681#endif
1682}
1683
1684
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.