linux/kernel/hrtimer.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/hrtimer.c
   3 *
   4 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   5 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   6 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
   7 *
   8 *  High-resolution kernel timers
   9 *
  10 *  In contrast to the low-resolution timeout API implemented in
  11 *  kernel/timer.c, hrtimers provide finer resolution and accuracy
  12 *  depending on system configuration and capabilities.
  13 *
  14 *  These timers are currently used for:
  15 *   - itimers
  16 *   - POSIX timers
  17 *   - nanosleep
  18 *   - precise in-kernel timing
  19 *
  20 *  Started by: Thomas Gleixner and Ingo Molnar
  21 *
  22 *  Credits:
  23 *      based on kernel/timer.c
  24 *
  25 *      Help, testing, suggestions, bugfixes, improvements were
  26 *      provided by:
  27 *
  28 *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
  29 *      et. al.
  30 *
  31 *  For licencing details see kernel-base/COPYING
  32 */
  33
  34#include <linux/cpu.h>
  35#include <linux/irq.h>
  36#include <linux/module.h>
  37#include <linux/percpu.h>
  38#include <linux/hrtimer.h>
  39#include <linux/notifier.h>
  40#include <linux/syscalls.h>
  41#include <linux/kallsyms.h>
  42#include <linux/interrupt.h>
  43#include <linux/tick.h>
  44#include <linux/seq_file.h>
  45#include <linux/err.h>
  46#include <linux/debugobjects.h>
  47
  48#include <asm/uaccess.h>
  49
  50/**
  51 * ktime_get - get the monotonic time in ktime_t format
  52 *
  53 * returns the time in ktime_t format
  54 */
  55ktime_t ktime_get(void)
  56{
  57        struct timespec now;
  58
  59        ktime_get_ts(&now);
  60
  61        return timespec_to_ktime(now);
  62}
  63EXPORT_SYMBOL_GPL(ktime_get);
  64
  65/**
  66 * ktime_get_real - get the real (wall-) time in ktime_t format
  67 *
  68 * returns the time in ktime_t format
  69 */
  70ktime_t ktime_get_real(void)
  71{
  72        struct timespec now;
  73
  74        getnstimeofday(&now);
  75
  76        return timespec_to_ktime(now);
  77}
  78
  79EXPORT_SYMBOL_GPL(ktime_get_real);
  80
  81/*
  82 * The timer bases:
  83 *
  84 * Note: If we want to add new timer bases, we have to skip the two
  85 * clock ids captured by the cpu-timers. We do this by holding empty
  86 * entries rather than doing math adjustment of the clock ids.
  87 * This ensures that we capture erroneous accesses to these clock ids
  88 * rather than moving them into the range of valid clock id's.
  89 */
  90DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
  91{
  92
  93        .clock_base =
  94        {
  95                {
  96                        .index = CLOCK_REALTIME,
  97                        .get_time = &ktime_get_real,
  98                        .resolution = KTIME_LOW_RES,
  99                },
 100                {
 101                        .index = CLOCK_MONOTONIC,
 102                        .get_time = &ktime_get,
 103                        .resolution = KTIME_LOW_RES,
 104                },
 105        }
 106};
 107
 108/**
 109 * ktime_get_ts - get the monotonic clock in timespec format
 110 * @ts:         pointer to timespec variable
 111 *
 112 * The function calculates the monotonic clock from the realtime
 113 * clock and the wall_to_monotonic offset and stores the result
 114 * in normalized timespec format in the variable pointed to by @ts.
 115 */
 116void ktime_get_ts(struct timespec *ts)
 117{
 118        struct timespec tomono;
 119        unsigned long seq;
 120
 121        do {
 122                seq = read_seqbegin(&xtime_lock);
 123                getnstimeofday(ts);
 124                tomono = wall_to_monotonic;
 125
 126        } while (read_seqretry(&xtime_lock, seq));
 127
 128        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
 129                                ts->tv_nsec + tomono.tv_nsec);
 130}
 131EXPORT_SYMBOL_GPL(ktime_get_ts);
 132
 133/*
 134 * Get the coarse grained time at the softirq based on xtime and
 135 * wall_to_monotonic.
 136 */
 137static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 138{
 139        ktime_t xtim, tomono;
 140        struct timespec xts, tom;
 141        unsigned long seq;
 142
 143        do {
 144                seq = read_seqbegin(&xtime_lock);
 145                xts = current_kernel_time();
 146                tom = wall_to_monotonic;
 147        } while (read_seqretry(&xtime_lock, seq));
 148
 149        xtim = timespec_to_ktime(xts);
 150        tomono = timespec_to_ktime(tom);
 151        base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
 152        base->clock_base[CLOCK_MONOTONIC].softirq_time =
 153                ktime_add(xtim, tomono);
 154}
 155
 156/*
 157 * Functions and macros which are different for UP/SMP systems are kept in a
 158 * single place
 159 */
 160#ifdef CONFIG_SMP
 161
 162/*
 163 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 164 * means that all timers which are tied to this base via timer->base are
 165 * locked, and the base itself is locked too.
 166 *
 167 * So __run_timers/migrate_timers can safely modify all timers which could
 168 * be found on the lists/queues.
 169 *
 170 * When the timer's base is locked, and the timer removed from list, it is
 171 * possible to set timer->base = NULL and drop the lock: the timer remains
 172 * locked.
 173 */
 174static
 175struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 176                                             unsigned long *flags)
 177{
 178        struct hrtimer_clock_base *base;
 179
 180        for (;;) {
 181                base = timer->base;
 182                if (likely(base != NULL)) {
 183                        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 184                        if (likely(base == timer->base))
 185                                return base;
 186                        /* The timer has migrated to another CPU: */
 187                        spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
 188                }
 189                cpu_relax();
 190        }
 191}
 192
 193/*
 194 * Switch the timer base to the current CPU when possible.
 195 */
 196static inline struct hrtimer_clock_base *
 197switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
 198{
 199        struct hrtimer_clock_base *new_base;
 200        struct hrtimer_cpu_base *new_cpu_base;
 201
 202        new_cpu_base = &__get_cpu_var(hrtimer_bases);
 203        new_base = &new_cpu_base->clock_base[base->index];
 204
 205        if (base != new_base) {
 206                /*
 207                 * We are trying to schedule the timer on the local CPU.
 208                 * However we can't change timer's base while it is running,
 209                 * so we keep it on the same CPU. No hassle vs. reprogramming
 210                 * the event source in the high resolution case. The softirq
 211                 * code will take care of this when the timer function has
 212                 * completed. There is no conflict as we hold the lock until
 213                 * the timer is enqueued.
 214                 */
 215                if (unlikely(hrtimer_callback_running(timer)))
 216                        return base;
 217
 218                /* See the comment in lock_timer_base() */
 219                timer->base = NULL;
 220                spin_unlock(&base->cpu_base->lock);
 221                spin_lock(&new_base->cpu_base->lock);
 222                timer->base = new_base;
 223        }
 224        return new_base;
 225}
 226
 227#else /* CONFIG_SMP */
 228
 229static inline struct hrtimer_clock_base *
 230lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 231{
 232        struct hrtimer_clock_base *base = timer->base;
 233
 234        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 235
 236        return base;
 237}
 238
 239# define switch_hrtimer_base(t, b)      (b)
 240
 241#endif  /* !CONFIG_SMP */
 242
 243/*
 244 * Functions for the union type storage format of ktime_t which are
 245 * too large for inlining:
 246 */
 247#if BITS_PER_LONG < 64
 248# ifndef CONFIG_KTIME_SCALAR
 249/**
 250 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
 251 * @kt:         addend
 252 * @nsec:       the scalar nsec value to add
 253 *
 254 * Returns the sum of kt and nsec in ktime_t format
 255 */
 256ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
 257{
 258        ktime_t tmp;
 259
 260        if (likely(nsec < NSEC_PER_SEC)) {
 261                tmp.tv64 = nsec;
 262        } else {
 263                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 264
 265                tmp = ktime_set((long)nsec, rem);
 266        }
 267
 268        return ktime_add(kt, tmp);
 269}
 270
 271EXPORT_SYMBOL_GPL(ktime_add_ns);
 272
 273/**
 274 * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
 275 * @kt:         minuend
 276 * @nsec:       the scalar nsec value to subtract
 277 *
 278 * Returns the subtraction of @nsec from @kt in ktime_t format
 279 */
 280ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
 281{
 282        ktime_t tmp;
 283
 284        if (likely(nsec < NSEC_PER_SEC)) {
 285                tmp.tv64 = nsec;
 286        } else {
 287                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 288
 289                tmp = ktime_set((long)nsec, rem);
 290        }
 291
 292        return ktime_sub(kt, tmp);
 293}
 294
 295EXPORT_SYMBOL_GPL(ktime_sub_ns);
 296# endif /* !CONFIG_KTIME_SCALAR */
 297
 298/*
 299 * Divide a ktime value by a nanosecond value
 300 */
 301u64 ktime_divns(const ktime_t kt, s64 div)
 302{
 303        u64 dclc;
 304        int sft = 0;
 305
 306        dclc = ktime_to_ns(kt);
 307        /* Make sure the divisor is less than 2^32: */
 308        while (div >> 32) {
 309                sft++;
 310                div >>= 1;
 311        }
 312        dclc >>= sft;
 313        do_div(dclc, (unsigned long) div);
 314
 315        return dclc;
 316}
 317#endif /* BITS_PER_LONG >= 64 */
 318
 319/*
 320 * Add two ktime values and do a safety check for overflow:
 321 */
 322ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
 323{
 324        ktime_t res = ktime_add(lhs, rhs);
 325
 326        /*
 327         * We use KTIME_SEC_MAX here, the maximum timeout which we can
 328         * return to user space in a timespec:
 329         */
 330        if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
 331                res = ktime_set(KTIME_SEC_MAX, 0);
 332
 333        return res;
 334}
 335
 336#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 337
 338static struct debug_obj_descr hrtimer_debug_descr;
 339
 340/*
 341 * fixup_init is called when:
 342 * - an active object is initialized
 343 */
 344static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
 345{
 346        struct hrtimer *timer = addr;
 347
 348        switch (state) {
 349        case ODEBUG_STATE_ACTIVE:
 350                hrtimer_cancel(timer);
 351                debug_object_init(timer, &hrtimer_debug_descr);
 352                return 1;
 353        default:
 354                return 0;
 355        }
 356}
 357
 358/*
 359 * fixup_activate is called when:
 360 * - an active object is activated
 361 * - an unknown object is activated (might be a statically initialized object)
 362 */
 363static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
 364{
 365        switch (state) {
 366
 367        case ODEBUG_STATE_NOTAVAILABLE:
 368                WARN_ON_ONCE(1);
 369                return 0;
 370
 371        case ODEBUG_STATE_ACTIVE:
 372                WARN_ON(1);
 373
 374        default:
 375                return 0;
 376        }
 377}
 378
 379/*
 380 * fixup_free is called when:
 381 * - an active object is freed
 382 */
 383static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
 384{
 385        struct hrtimer *timer = addr;
 386
 387        switch (state) {
 388        case ODEBUG_STATE_ACTIVE:
 389                hrtimer_cancel(timer);
 390                debug_object_free(timer, &hrtimer_debug_descr);
 391                return 1;
 392        default:
 393                return 0;
 394        }
 395}
 396
 397static struct debug_obj_descr hrtimer_debug_descr = {
 398        .name           = "hrtimer",
 399        .fixup_init     = hrtimer_fixup_init,
 400        .fixup_activate = hrtimer_fixup_activate,
 401        .fixup_free     = hrtimer_fixup_free,
 402};
 403
 404static inline void debug_hrtimer_init(struct hrtimer *timer)
 405{
 406        debug_object_init(timer, &hrtimer_debug_descr);
 407}
 408
 409static inline void debug_hrtimer_activate(struct hrtimer *timer)
 410{
 411        debug_object_activate(timer, &hrtimer_debug_descr);
 412}
 413
 414static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
 415{
 416        debug_object_deactivate(timer, &hrtimer_debug_descr);
 417}
 418
 419static inline void debug_hrtimer_free(struct hrtimer *timer)
 420{
 421        debug_object_free(timer, &hrtimer_debug_descr);
 422}
 423
 424static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 425                           enum hrtimer_mode mode);
 426
 427void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
 428                           enum hrtimer_mode mode)
 429{
 430        debug_object_init_on_stack(timer, &hrtimer_debug_descr);
 431        __hrtimer_init(timer, clock_id, mode);
 432}
 433
 434void destroy_hrtimer_on_stack(struct hrtimer *timer)
 435{
 436        debug_object_free(timer, &hrtimer_debug_descr);
 437}
 438
 439#else
 440static inline void debug_hrtimer_init(struct hrtimer *timer) { }
 441static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 442static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 443#endif
 444
 445/*
 446 * Check, whether the timer is on the callback pending list
 447 */
 448static inline int hrtimer_cb_pending(const struct hrtimer *timer)
 449{
 450        return timer->state & HRTIMER_STATE_PENDING;
 451}
 452
 453/*
 454 * Remove a timer from the callback pending list
 455 */
 456static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
 457{
 458        list_del_init(&timer->cb_entry);
 459}
 460
 461/* High resolution timer related functions */
 462#ifdef CONFIG_HIGH_RES_TIMERS
 463
 464/*
 465 * High resolution timer enabled ?
 466 */
 467static int hrtimer_hres_enabled __read_mostly  = 1;
 468
 469/*
 470 * Enable / Disable high resolution mode
 471 */
 472static int __init setup_hrtimer_hres(char *str)
 473{
 474        if (!strcmp(str, "off"))
 475                hrtimer_hres_enabled = 0;
 476        else if (!strcmp(str, "on"))
 477                hrtimer_hres_enabled = 1;
 478        else
 479                return 0;
 480        return 1;
 481}
 482
 483__setup("highres=", setup_hrtimer_hres);
 484
 485/*
 486 * hrtimer_high_res_enabled - query, if the highres mode is enabled
 487 */
 488static inline int hrtimer_is_hres_enabled(void)
 489{
 490        return hrtimer_hres_enabled;
 491}
 492
 493/*
 494 * Is the high resolution mode active ?
 495 */
 496static inline int hrtimer_hres_active(void)
 497{
 498        return __get_cpu_var(hrtimer_bases).hres_active;
 499}
 500
 501/*
 502 * Reprogram the event source with checking both queues for the
 503 * next event
 504 * Called with interrupts disabled and base->lock held
 505 */
 506static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
 507{
 508        int i;
 509        struct hrtimer_clock_base *base = cpu_base->clock_base;
 510        ktime_t expires;
 511
 512        cpu_base->expires_next.tv64 = KTIME_MAX;
 513
 514        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 515                struct hrtimer *timer;
 516
 517                if (!base->first)
 518                        continue;
 519                timer = rb_entry(base->first, struct hrtimer, node);
 520                expires = ktime_sub(timer->expires, base->offset);
 521                if (expires.tv64 < cpu_base->expires_next.tv64)
 522                        cpu_base->expires_next = expires;
 523        }
 524
 525        if (cpu_base->expires_next.tv64 != KTIME_MAX)
 526                tick_program_event(cpu_base->expires_next, 1);
 527}
 528
 529/*
 530 * Shared reprogramming for clock_realtime and clock_monotonic
 531 *
 532 * When a timer is enqueued and expires earlier than the already enqueued
 533 * timers, we have to check, whether it expires earlier than the timer for
 534 * which the clock event device was armed.
 535 *
 536 * Called with interrupts disabled and base->cpu_base.lock held
 537 */
 538static int hrtimer_reprogram(struct hrtimer *timer,
 539                             struct hrtimer_clock_base *base)
 540{
 541        ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
 542        ktime_t expires = ktime_sub(timer->expires, base->offset);
 543        int res;
 544
 545        WARN_ON_ONCE(timer->expires.tv64 < 0);
 546
 547        /*
 548         * When the callback is running, we do not reprogram the clock event
 549         * device. The timer callback is either running on a different CPU or
 550         * the callback is executed in the hrtimer_interrupt context. The
 551         * reprogramming is handled either by the softirq, which called the
 552         * callback or at the end of the hrtimer_interrupt.
 553         */
 554        if (hrtimer_callback_running(timer))
 555                return 0;
 556
 557        /*
 558         * CLOCK_REALTIME timer might be requested with an absolute
 559         * expiry time which is less than base->offset. Nothing wrong
 560         * about that, just avoid to call into the tick code, which
 561         * has now objections against negative expiry values.
 562         */
 563        if (expires.tv64 < 0)
 564                return -ETIME;
 565
 566        if (expires.tv64 >= expires_next->tv64)
 567                return 0;
 568
 569        /*
 570         * Clockevents returns -ETIME, when the event was in the past.
 571         */
 572        res = tick_program_event(expires, 0);
 573        if (!IS_ERR_VALUE(res))
 574                *expires_next = expires;
 575        return res;
 576}
 577
 578
 579/*
 580 * Retrigger next event is called after clock was set
 581 *
 582 * Called with interrupts disabled via on_each_cpu()
 583 */
 584static void retrigger_next_event(void *arg)
 585{
 586        struct hrtimer_cpu_base *base;
 587        struct timespec realtime_offset;
 588        unsigned long seq;
 589
 590        if (!hrtimer_hres_active())
 591                return;
 592
 593        do {
 594                seq = read_seqbegin(&xtime_lock);
 595                set_normalized_timespec(&realtime_offset,
 596                                        -wall_to_monotonic.tv_sec,
 597                                        -wall_to_monotonic.tv_nsec);
 598        } while (read_seqretry(&xtime_lock, seq));
 599
 600        base = &__get_cpu_var(hrtimer_bases);
 601
 602        /* Adjust CLOCK_REALTIME offset */
 603        spin_lock(&base->lock);
 604        base->clock_base[CLOCK_REALTIME].offset =
 605                timespec_to_ktime(realtime_offset);
 606
 607        hrtimer_force_reprogram(base);
 608        spin_unlock(&base->lock);
 609}
 610
 611/*
 612 * Clock realtime was set
 613 *
 614 * Change the offset of the realtime clock vs. the monotonic
 615 * clock.
 616 *
 617 * We might have to reprogram the high resolution timer interrupt. On
 618 * SMP we call the architecture specific code to retrigger _all_ high
 619 * resolution timer interrupts. On UP we just disable interrupts and
 620 * call the high resolution interrupt code.
 621 */
 622void clock_was_set(void)
 623{
 624        /* Retrigger the CPU local events everywhere */
 625        on_each_cpu(retrigger_next_event, NULL, 1);
 626}
 627
 628/*
 629 * During resume we might have to reprogram the high resolution timer
 630 * interrupt (on the local CPU):
 631 */
 632void hres_timers_resume(void)
 633{
 634        /* Retrigger the CPU local events: */
 635        retrigger_next_event(NULL);
 636}
 637
 638/*
 639 * Initialize the high resolution related parts of cpu_base
 640 */
 641static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
 642{
 643        base->expires_next.tv64 = KTIME_MAX;
 644        base->hres_active = 0;
 645}
 646
 647/*
 648 * Initialize the high resolution related parts of a hrtimer
 649 */
 650static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 651{
 652}
 653
 654/*
 655 * When High resolution timers are active, try to reprogram. Note, that in case
 656 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
 657 * check happens. The timer gets enqueued into the rbtree. The reprogramming
 658 * and expiry check is done in the hrtimer_interrupt or in the softirq.
 659 */
 660static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 661                                            struct hrtimer_clock_base *base)
 662{
 663        if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
 664
 665                /* Timer is expired, act upon the callback mode */
 666                switch(timer->cb_mode) {
 667                case HRTIMER_CB_IRQSAFE_NO_RESTART:
 668                        debug_hrtimer_deactivate(timer);
 669                        /*
 670                         * We can call the callback from here. No restart
 671                         * happens, so no danger of recursion
 672                         */
 673                        BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
 674                        return 1;
 675                case HRTIMER_CB_IRQSAFE_PERCPU:
 676                case HRTIMER_CB_IRQSAFE_UNLOCKED:
 677                        /*
 678                         * This is solely for the sched tick emulation with
 679                         * dynamic tick support to ensure that we do not
 680                         * restart the tick right on the edge and end up with
 681                         * the tick timer in the softirq ! The calling site
 682                         * takes care of this. Also used for hrtimer sleeper !
 683                         */
 684                        debug_hrtimer_deactivate(timer);
 685                        return 1;
 686                case HRTIMER_CB_IRQSAFE:
 687                case HRTIMER_CB_SOFTIRQ:
 688                        /*
 689                         * Move everything else into the softirq pending list !
 690                         */
 691                        list_add_tail(&timer->cb_entry,
 692                                      &base->cpu_base->cb_pending);
 693                        timer->state = HRTIMER_STATE_PENDING;
 694                        return 1;
 695                default:
 696                        BUG();
 697                }
 698        }
 699        return 0;
 700}
 701
 702/*
 703 * Switch to high resolution mode
 704 */
 705static int hrtimer_switch_to_hres(void)
 706{
 707        int cpu = smp_processor_id();
 708        struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 709        unsigned long flags;
 710
 711        if (base->hres_active)
 712                return 1;
 713
 714        local_irq_save(flags);
 715
 716        if (tick_init_highres()) {
 717                local_irq_restore(flags);
 718                printk(KERN_WARNING "Could not switch to high resolution "
 719                                    "mode on CPU %d\n", cpu);
 720                return 0;
 721        }
 722        base->hres_active = 1;
 723        base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
 724        base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
 725
 726        tick_setup_sched_timer();
 727
 728        /* "Retrigger" the interrupt to get things going */
 729        retrigger_next_event(NULL);
 730        local_irq_restore(flags);
 731        printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
 732               smp_processor_id());
 733        return 1;
 734}
 735
 736static inline void hrtimer_raise_softirq(void)
 737{
 738        raise_softirq(HRTIMER_SOFTIRQ);
 739}
 740
 741#else
 742
 743static inline int hrtimer_hres_active(void) { return 0; }
 744static inline int hrtimer_is_hres_enabled(void) { return 0; }
 745static inline int hrtimer_switch_to_hres(void) { return 0; }
 746static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
 747static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 748                                            struct hrtimer_clock_base *base)
 749{
 750        return 0;
 751}
 752static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 753static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
 754static inline int hrtimer_reprogram(struct hrtimer *timer,
 755                                    struct hrtimer_clock_base *base)
 756{
 757        return 0;
 758}
 759static inline void hrtimer_raise_softirq(void) { }
 760
 761#endif /* CONFIG_HIGH_RES_TIMERS */
 762
 763#ifdef CONFIG_TIMER_STATS
 764void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
 765{
 766        if (timer->start_site)
 767                return;
 768
 769        timer->start_site = addr;
 770        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 771        timer->start_pid = current->pid;
 772}
 773#endif
 774
 775/*
 776 * Counterpart to lock_hrtimer_base above:
 777 */
 778static inline
 779void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 780{
 781        spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
 782}
 783
 784/**
 785 * hrtimer_forward - forward the timer expiry
 786 * @timer:      hrtimer to forward
 787 * @now:        forward past this time
 788 * @interval:   the interval to forward
 789 *
 790 * Forward the timer expiry so it will expire in the future.
 791 * Returns the number of overruns.
 792 */
 793u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 794{
 795        u64 orun = 1;
 796        ktime_t delta;
 797
 798        delta = ktime_sub(now, timer->expires);
 799
 800        if (delta.tv64 < 0)
 801                return 0;
 802
 803        if (interval.tv64 < timer->base->resolution.tv64)
 804                interval.tv64 = timer->base->resolution.tv64;
 805
 806        if (unlikely(delta.tv64 >= interval.tv64)) {
 807                s64 incr = ktime_to_ns(interval);
 808
 809                orun = ktime_divns(delta, incr);
 810                timer->expires = ktime_add_ns(timer->expires, incr * orun);
 811                if (timer->expires.tv64 > now.tv64)
 812                        return orun;
 813                /*
 814                 * This (and the ktime_add() below) is the
 815                 * correction for exact:
 816                 */
 817                orun++;
 818        }
 819        timer->expires = ktime_add_safe(timer->expires, interval);
 820
 821        return orun;
 822}
 823EXPORT_SYMBOL_GPL(hrtimer_forward);
 824
 825/*
 826 * enqueue_hrtimer - internal function to (re)start a timer
 827 *
 828 * The timer is inserted in expiry order. Insertion into the
 829 * red black tree is O(log(n)). Must hold the base lock.
 830 */
 831static void enqueue_hrtimer(struct hrtimer *timer,
 832                            struct hrtimer_clock_base *base, int reprogram)
 833{
 834        struct rb_node **link = &base->active.rb_node;
 835        struct rb_node *parent = NULL;
 836        struct hrtimer *entry;
 837        int leftmost = 1;
 838
 839        debug_hrtimer_activate(timer);
 840
 841        /*
 842         * Find the right place in the rbtree:
 843         */
 844        while (*link) {
 845                parent = *link;
 846                entry = rb_entry(parent, struct hrtimer, node);
 847                /*
 848                 * We dont care about collisions. Nodes with
 849                 * the same expiry time stay together.
 850                 */
 851                if (timer->expires.tv64 < entry->expires.tv64) {
 852                        link = &(*link)->rb_left;
 853                } else {
 854                        link = &(*link)->rb_right;
 855                        leftmost = 0;
 856                }
 857        }
 858
 859        /*
 860         * Insert the timer to the rbtree and check whether it
 861         * replaces the first pending timer
 862         */
 863        if (leftmost) {
 864                /*
 865                 * Reprogram the clock event device. When the timer is already
 866                 * expired hrtimer_enqueue_reprogram has either called the
 867                 * callback or added it to the pending list and raised the
 868                 * softirq.
 869                 *
 870                 * This is a NOP for !HIGHRES
 871                 */
 872                if (reprogram && hrtimer_enqueue_reprogram(timer, base))
 873                        return;
 874
 875                base->first = &timer->node;
 876        }
 877
 878        rb_link_node(&timer->node, parent, link);
 879        rb_insert_color(&timer->node, &base->active);
 880        /*
 881         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
 882         * state of a possibly running callback.
 883         */
 884        timer->state |= HRTIMER_STATE_ENQUEUED;
 885}
 886
 887/*
 888 * __remove_hrtimer - internal function to remove a timer
 889 *
 890 * Caller must hold the base lock.
 891 *
 892 * High resolution timer mode reprograms the clock event device when the
 893 * timer is the one which expires next. The caller can disable this by setting
 894 * reprogram to zero. This is useful, when the context does a reprogramming
 895 * anyway (e.g. timer interrupt)
 896 */
 897static void __remove_hrtimer(struct hrtimer *timer,
 898                             struct hrtimer_clock_base *base,
 899                             unsigned long newstate, int reprogram)
 900{
 901        /* High res. callback list. NOP for !HIGHRES */
 902        if (hrtimer_cb_pending(timer))
 903                hrtimer_remove_cb_pending(timer);
 904        else {
 905                /*
 906                 * Remove the timer from the rbtree and replace the
 907                 * first entry pointer if necessary.
 908                 */
 909                if (base->first == &timer->node) {
 910                        base->first = rb_next(&timer->node);
 911                        /* Reprogram the clock event device. if enabled */
 912                        if (reprogram && hrtimer_hres_active())
 913                                hrtimer_force_reprogram(base->cpu_base);
 914                }
 915                rb_erase(&timer->node, &base->active);
 916        }
 917        timer->state = newstate;
 918}
 919
 920/*
 921 * remove hrtimer, called with base lock held
 922 */
 923static inline int
 924remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 925{
 926        if (hrtimer_is_queued(timer)) {
 927                int reprogram;
 928
 929                /*
 930                 * Remove the timer and force reprogramming when high
 931                 * resolution mode is active and the timer is on the current
 932                 * CPU. If we remove a timer on another CPU, reprogramming is
 933                 * skipped. The interrupt event on this CPU is fired and
 934                 * reprogramming happens in the interrupt handler. This is a
 935                 * rare case and less expensive than a smp call.
 936                 */
 937                debug_hrtimer_deactivate(timer);
 938                timer_stats_hrtimer_clear_start_info(timer);
 939                reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 940                __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
 941                                 reprogram);
 942                return 1;
 943        }
 944        return 0;
 945}
 946
 947/**
 948 * hrtimer_start - (re)start an relative timer on the current CPU
 949 * @timer:      the timer to be added
 950 * @tim:        expiry time
 951 * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 952 *
 953 * Returns:
 954 *  0 on success
 955 *  1 when the timer was active
 956 */
 957int
 958hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 959{
 960        struct hrtimer_clock_base *base, *new_base;
 961        unsigned long flags;
 962        int ret, raise;
 963
 964        base = lock_hrtimer_base(timer, &flags);
 965
 966        /* Remove an active timer from the queue: */
 967        ret = remove_hrtimer(timer, base);
 968
 969        /* Switch the timer base, if necessary: */
 970        new_base = switch_hrtimer_base(timer, base);
 971
 972        if (mode == HRTIMER_MODE_REL) {
 973                tim = ktime_add_safe(tim, new_base->get_time());
 974                /*
 975                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
 976                 * to signal that they simply return xtime in
 977                 * do_gettimeoffset(). In this case we want to round up by
 978                 * resolution when starting a relative timer, to avoid short
 979                 * timeouts. This will go away with the GTOD framework.
 980                 */
 981#ifdef CONFIG_TIME_LOW_RES
 982                tim = ktime_add_safe(tim, base->resolution);
 983#endif
 984        }
 985
 986        timer->expires = tim;
 987
 988        timer_stats_hrtimer_set_start_info(timer);
 989
 990        /*
 991         * Only allow reprogramming if the new base is on this CPU.
 992         * (it might still be on another CPU if the timer was pending)
 993         */
 994        enqueue_hrtimer(timer, new_base,
 995                        new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 996
 997        /*
 998         * The timer may be expired and moved to the cb_pending
 999         * list. We can not raise the softirq with base lock held due
1000         * to a possible deadlock with runqueue lock.
1001         */
1002        raise = timer->state == HRTIMER_STATE_PENDING;
1003
1004        /*
1005         * We use preempt_disable to prevent this task from migrating after
1006         * setting up the softirq and raising it. Otherwise, if me migrate
1007         * we will raise the softirq on the wrong CPU.
1008         */
1009        preempt_disable();
1010
1011        unlock_hrtimer_base(timer, &flags);
1012
1013        if (raise)
1014                hrtimer_raise_softirq();
1015        preempt_enable();
1016
1017        return ret;
1018}
1019EXPORT_SYMBOL_GPL(hrtimer_start);
1020
1021/**
1022 * hrtimer_try_to_cancel - try to deactivate a timer
1023 * @timer:      hrtimer to stop
1024 *
1025 * Returns:
1026 *  0 when the timer was not active
1027 *  1 when the timer was active
1028 * -1 when the timer is currently excuting the callback function and
1029 *    cannot be stopped
1030 */
1031int hrtimer_try_to_cancel(struct hrtimer *timer)
1032{
1033        struct hrtimer_clock_base *base;
1034        unsigned long flags;
1035        int ret = -1;
1036
1037        base = lock_hrtimer_base(timer, &flags);
1038
1039        if (!hrtimer_callback_running(timer))
1040                ret = remove_hrtimer(timer, base);
1041
1042        unlock_hrtimer_base(timer, &flags);
1043
1044        return ret;
1045
1046}
1047EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1048
1049/**
1050 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1051 * @timer:      the timer to be cancelled
1052 *
1053 * Returns:
1054 *  0 when the timer was not active
1055 *  1 when the timer was active
1056 */
1057int hrtimer_cancel(struct hrtimer *timer)
1058{
1059        for (;;) {
1060                int ret = hrtimer_try_to_cancel(timer);
1061
1062                if (ret >= 0)
1063                        return ret;
1064                cpu_relax();
1065        }
1066}
1067EXPORT_SYMBOL_GPL(hrtimer_cancel);
1068
1069/**
1070 * hrtimer_get_remaining - get remaining time for the timer
1071 * @timer:      the timer to read
1072 */
1073ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1074{
1075        struct hrtimer_clock_base *base;
1076        unsigned long flags;
1077        ktime_t rem;
1078
1079        base = lock_hrtimer_base(timer, &flags);
1080        rem = ktime_sub(timer->expires, base->get_time());
1081        unlock_hrtimer_base(timer, &flags);
1082
1083        return rem;
1084}
1085EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
1086
1087#ifdef CONFIG_NO_HZ
1088/**
1089 * hrtimer_get_next_event - get the time until next expiry event
1090 *
1091 * Returns the delta to the next expiry event or KTIME_MAX if no timer
1092 * is pending.
1093 */
1094ktime_t hrtimer_get_next_event(void)
1095{
1096        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1097        struct hrtimer_clock_base *base = cpu_base->clock_base;
1098        ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
1099        unsigned long flags;
1100        int i;
1101
1102        spin_lock_irqsave(&cpu_base->lock, flags);
1103
1104        if (!hrtimer_hres_active()) {
1105                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
1106                        struct hrtimer *timer;
1107
1108                        if (!base->first)
1109                                continue;
1110
1111                        timer = rb_entry(base->first, struct hrtimer, node);
1112                        delta.tv64 = timer->expires.tv64;
1113                        delta = ktime_sub(delta, base->get_time());
1114                        if (delta.tv64 < mindelta.tv64)
1115                                mindelta.tv64 = delta.tv64;
1116                }
1117        }
1118
1119        spin_unlock_irqrestore(&cpu_base->lock, flags);
1120
1121        if (mindelta.tv64 < 0)
1122                mindelta.tv64 = 0;
1123        return mindelta;
1124}
1125#endif
1126
1127static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1128                           enum hrtimer_mode mode)
1129{
1130        struct hrtimer_cpu_base *cpu_base;
1131
1132        memset(timer, 0, sizeof(struct hrtimer));
1133
1134        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1135
1136        if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1137                clock_id = CLOCK_MONOTONIC;
1138
1139        timer->base = &cpu_base->clock_base[clock_id];
1140        INIT_LIST_HEAD(&timer->cb_entry);
1141        hrtimer_init_timer_hres(timer);
1142
1143#ifdef CONFIG_TIMER_STATS
1144        timer->start_site = NULL;
1145        timer->start_pid = -1;
1146        memset(timer->start_comm, 0, TASK_COMM_LEN);
1147#endif
1148}
1149
1150/**
1151 * hrtimer_init - initialize a timer to the given clock
1152 * @timer:      the timer to be initialized
1153 * @clock_id:   the clock to be used
1154 * @mode:       timer mode abs/rel
1155 */
1156void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1157                  enum hrtimer_mode mode)
1158{
1159        debug_hrtimer_init(timer);
1160        __hrtimer_init(timer, clock_id, mode);
1161}
1162EXPORT_SYMBOL_GPL(hrtimer_init);
1163
1164/**
1165 * hrtimer_get_res - get the timer resolution for a clock
1166 * @which_clock: which clock to query
1167 * @tp:          pointer to timespec variable to store the resolution
1168 *
1169 * Store the resolution of the clock selected by @which_clock in the
1170 * variable pointed to by @tp.
1171 */
1172int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1173{
1174        struct hrtimer_cpu_base *cpu_base;
1175
1176        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1177        *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution);
1178
1179        return 0;
1180}
1181EXPORT_SYMBOL_GPL(hrtimer_get_res);
1182
1183static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1184{
1185        spin_lock_irq(&cpu_base->lock);
1186
1187        while (!list_empty(&cpu_base->cb_pending)) {
1188                enum hrtimer_restart (*fn)(struct hrtimer *);
1189                struct hrtimer *timer;
1190                int restart;
1191
1192                timer = list_entry(cpu_base->cb_pending.next,
1193                                   struct hrtimer, cb_entry);
1194
1195                debug_hrtimer_deactivate(timer);
1196                timer_stats_account_hrtimer(timer);
1197
1198                fn = timer->function;
1199                __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1200                spin_unlock_irq(&cpu_base->lock);
1201
1202                restart = fn(timer);
1203
1204                spin_lock_irq(&cpu_base->lock);
1205
1206                timer->state &= ~HRTIMER_STATE_CALLBACK;
1207                if (restart == HRTIMER_RESTART) {
1208                        BUG_ON(hrtimer_active(timer));
1209                        /*
1210                         * Enqueue the timer, allow reprogramming of the event
1211                         * device
1212                         */
1213                        enqueue_hrtimer(timer, timer->base, 1);
1214                } else if (hrtimer_active(timer)) {
1215                        /*
1216                         * If the timer was rearmed on another CPU, reprogram
1217                         * the event device.
1218                         */
1219                        struct hrtimer_clock_base *base = timer->base;
1220
1221                        if (base->first == &timer->node &&
1222                            hrtimer_reprogram(timer, base)) {
1223                                /*
1224                                 * Timer is expired. Thus move it from tree to
1225                                 * pending list again.
1226                                 */
1227                                __remove_hrtimer(timer, base,
1228                                                 HRTIMER_STATE_PENDING, 0);
1229                                list_add_tail(&timer->cb_entry,
1230                                              &base->cpu_base->cb_pending);
1231                        }
1232                }
1233        }
1234        spin_unlock_irq(&cpu_base->lock);
1235}
1236
1237static void __run_hrtimer(struct hrtimer *timer)
1238{
1239        struct hrtimer_clock_base *base = timer->base;
1240        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1241        enum hrtimer_restart (*fn)(struct hrtimer *);
1242        int restart;
1243
1244        debug_hrtimer_deactivate(timer);
1245        __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1246        timer_stats_account_hrtimer(timer);
1247
1248        fn = timer->function;
1249        if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1250            timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
1251                /*
1252                 * Used for scheduler timers, avoid lock inversion with
1253                 * rq->lock and tasklist_lock.
1254                 *
1255                 * These timers are required to deal with enqueue expiry
1256                 * themselves and are not allowed to migrate.
1257                 */
1258                spin_unlock(&cpu_base->lock);
1259                restart = fn(timer);
1260                spin_lock(&cpu_base->lock);
1261        } else
1262                restart = fn(timer);
1263
1264        /*
1265         * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
1266         * reprogramming of the event hardware. This happens at the end of this
1267         * function anyway.
1268         */
1269        if (restart != HRTIMER_NORESTART) {
1270                BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
1271                enqueue_hrtimer(timer, base, 0);
1272        }
1273        timer->state &= ~HRTIMER_STATE_CALLBACK;
1274}
1275
1276#ifdef CONFIG_HIGH_RES_TIMERS
1277
1278/*
1279 * High resolution timer interrupt
1280 * Called with interrupts disabled
1281 */
1282void hrtimer_interrupt(struct clock_event_device *dev)
1283{
1284        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1285        struct hrtimer_clock_base *base;
1286        ktime_t expires_next, now;
1287        int i, raise = 0;
1288
1289        BUG_ON(!cpu_base->hres_active);
1290        cpu_base->nr_events++;
1291        dev->next_event.tv64 = KTIME_MAX;
1292
1293 retry:
1294        now = ktime_get();
1295
1296        expires_next.tv64 = KTIME_MAX;
1297
1298        base = cpu_base->clock_base;
1299
1300        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1301                ktime_t basenow;
1302                struct rb_node *node;
1303
1304                spin_lock(&cpu_base->lock);
1305
1306                basenow = ktime_add(now, base->offset);
1307
1308                while ((node = base->first)) {
1309                        struct hrtimer *timer;
1310
1311                        timer = rb_entry(node, struct hrtimer, node);
1312
1313                        if (basenow.tv64 < timer->expires.tv64) {
1314                                ktime_t expires;
1315
1316                                expires = ktime_sub(timer->expires,
1317                                                    base->offset);
1318                                if (expires.tv64 < expires_next.tv64)
1319                                        expires_next = expires;
1320                                break;
1321                        }
1322
1323                        /* Move softirq callbacks to the pending list */
1324                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1325                                __remove_hrtimer(timer, base,
1326                                                 HRTIMER_STATE_PENDING, 0);
1327                                list_add_tail(&timer->cb_entry,
1328                                              &base->cpu_base->cb_pending);
1329                                raise = 1;
1330                                continue;
1331                        }
1332
1333                        __run_hrtimer(timer);
1334                }
1335                spin_unlock(&cpu_base->lock);
1336                base++;
1337        }
1338
1339        cpu_base->expires_next = expires_next;
1340
1341        /* Reprogramming necessary ? */
1342        if (expires_next.tv64 != KTIME_MAX) {
1343                if (tick_program_event(expires_next, 0))
1344                        goto retry;
1345        }
1346
1347        /* Raise softirq ? */
1348        if (raise)
1349                raise_softirq(HRTIMER_SOFTIRQ);
1350}
1351
1352static void run_hrtimer_softirq(struct softirq_action *h)
1353{
1354        run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
1355}
1356
1357#endif  /* CONFIG_HIGH_RES_TIMERS */
1358
1359/*
1360 * Called from timer softirq every jiffy, expire hrtimers:
1361 *
1362 * For HRT its the fall back code to run the softirq in the timer
1363 * softirq context in case the hrtimer initialization failed or has
1364 * not been done yet.
1365 */
1366void hrtimer_run_pending(void)
1367{
1368        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1369
1370        if (hrtimer_hres_active())
1371                return;
1372
1373        /*
1374         * This _is_ ugly: We have to check in the softirq context,
1375         * whether we can switch to highres and / or nohz mode. The
1376         * clocksource switch happens in the timer interrupt with
1377         * xtime_lock held. Notification from there only sets the
1378         * check bit in the tick_oneshot code, otherwise we might
1379         * deadlock vs. xtime_lock.
1380         */
1381        if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1382                hrtimer_switch_to_hres();
1383
1384        run_hrtimer_pending(cpu_base);
1385}
1386
1387/*
1388 * Called from hardirq context every jiffy
1389 */
1390void hrtimer_run_queues(void)
1391{
1392        struct rb_node *node;
1393        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1394        struct hrtimer_clock_base *base;
1395        int index, gettime = 1;
1396
1397        if (hrtimer_hres_active())
1398                return;
1399
1400        for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
1401                base = &cpu_base->clock_base[index];
1402
1403                if (!base->first)
1404                        continue;
1405
1406                if (base->get_softirq_time)
1407                        base->softirq_time = base->get_softirq_time();
1408                else if (gettime) {
1409                        hrtimer_get_softirq_time(cpu_base);
1410                        gettime = 0;
1411                }
1412
1413                spin_lock(&cpu_base->lock);
1414
1415                while ((node = base->first)) {
1416                        struct hrtimer *timer;
1417
1418                        timer = rb_entry(node, struct hrtimer, node);
1419                        if (base->softirq_time.tv64 <= timer->expires.tv64)
1420                                break;
1421
1422                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1423                                __remove_hrtimer(timer, base,
1424                                        HRTIMER_STATE_PENDING, 0);
1425                                list_add_tail(&timer->cb_entry,
1426                                        &base->cpu_base->cb_pending);
1427                                continue;
1428                        }
1429
1430                        __run_hrtimer(timer);
1431                }
1432                spin_unlock(&cpu_base->lock);
1433        }
1434}
1435
1436/*
1437 * Sleep related functions:
1438 */
1439static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1440{
1441        struct hrtimer_sleeper *t =
1442                container_of(timer, struct hrtimer_sleeper, timer);
1443        struct task_struct *task = t->task;
1444
1445        t->task = NULL;
1446        if (task)
1447                wake_up_process(task);
1448
1449        return HRTIMER_NORESTART;
1450}
1451
1452void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1453{
1454        sl->timer.function = hrtimer_wakeup;
1455        sl->task = task;
1456#ifdef CONFIG_HIGH_RES_TIMERS
1457        sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
1458#endif
1459}
1460
1461static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
1462{
1463        hrtimer_init_sleeper(t, current);
1464
1465        do {
1466                set_current_state(TASK_INTERRUPTIBLE);
1467                hrtimer_start(&t->timer, t->timer.expires, mode);
1468                if (!hrtimer_active(&t->timer))
1469                        t->task = NULL;
1470
1471                if (likely(t->task))
1472                        schedule();
1473
1474                hrtimer_cancel(&t->timer);
1475                mode = HRTIMER_MODE_ABS;
1476
1477        } while (t->task && !signal_pending(current));
1478
1479        __set_current_state(TASK_RUNNING);
1480
1481        return t->task == NULL;
1482}
1483
1484static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1485{
1486        struct timespec rmt;
1487        ktime_t rem;
1488
1489        rem = ktime_sub(timer->expires, timer->base->get_time());
1490        if (rem.tv64 <= 0)
1491                return 0;
1492        rmt = ktime_to_timespec(rem);
1493
1494        if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
1495                return -EFAULT;
1496
1497        return 1;
1498}
1499
1500long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1501{
1502        struct hrtimer_sleeper t;
1503        struct timespec __user  *rmtp;
1504        int ret = 0;
1505
1506        hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
1507                                HRTIMER_MODE_ABS);
1508        t.timer.expires.tv64 = restart->nanosleep.expires;
1509
1510        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1511                goto out;
1512
1513        rmtp = restart->nanosleep.rmtp;
1514        if (rmtp) {
1515                ret = update_rmtp(&t.timer, rmtp);
1516                if (ret <= 0)
1517                        goto out;
1518        }
1519
1520        /* The other values in restart are already filled in */
1521        ret = -ERESTART_RESTARTBLOCK;
1522out:
1523        destroy_hrtimer_on_stack(&t.timer);
1524        return ret;
1525}
1526
1527long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1528                       const enum hrtimer_mode mode, const clockid_t clockid)
1529{
1530        struct restart_block *restart;
1531        struct hrtimer_sleeper t;
1532        int ret = 0;
1533
1534        hrtimer_init_on_stack(&t.timer, clockid, mode);
1535        t.timer.expires = timespec_to_ktime(*rqtp);
1536        if (do_nanosleep(&t, mode))
1537                goto out;
1538
1539        /* Absolute timers do not update the rmtp value and restart: */
1540        if (mode == HRTIMER_MODE_ABS) {
1541                ret = -ERESTARTNOHAND;
1542                goto out;
1543        }
1544
1545        if (rmtp) {
1546                ret = update_rmtp(&t.timer, rmtp);
1547                if (ret <= 0)
1548                        goto out;
1549        }
1550
1551        restart = &current_thread_info()->restart_block;
1552        restart->fn = hrtimer_nanosleep_restart;
1553        restart->nanosleep.index = t.timer.base->index;
1554        restart->nanosleep.rmtp = rmtp;
1555        restart->nanosleep.expires = t.timer.expires.tv64;
1556
1557        ret = -ERESTART_RESTARTBLOCK;
1558out:
1559        destroy_hrtimer_on_stack(&t.timer);
1560        return ret;
1561}
1562
1563SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
1564                struct timespec __user *, rmtp)
1565{
1566        struct timespec tu;
1567
1568        if (copy_from_user(&tu, rqtp, sizeof(tu)))
1569                return -EFAULT;
1570
1571        if (!timespec_valid(&tu))
1572                return -EINVAL;
1573
1574        return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
1575}
1576
1577/*
1578 * Functions related to boot-time initialization:
1579 */
1580static void __cpuinit init_hrtimers_cpu(int cpu)
1581{
1582        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1583        int i;
1584
1585        spin_lock_init(&cpu_base->lock);
1586
1587        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1588                cpu_base->clock_base[i].cpu_base = cpu_base;
1589
1590        INIT_LIST_HEAD(&cpu_base->cb_pending);
1591        hrtimer_init_hres(cpu_base);
1592}
1593
1594#ifdef CONFIG_HOTPLUG_CPU
1595
1596static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1597                                struct hrtimer_clock_base *new_base, int dcpu)
1598{
1599        struct hrtimer *timer;
1600        struct rb_node *node;
1601        int raise = 0;
1602
1603        while ((node = rb_first(&old_base->active))) {
1604                timer = rb_entry(node, struct hrtimer, node);
1605                BUG_ON(hrtimer_callback_running(timer));
1606                debug_hrtimer_deactivate(timer);
1607
1608                /*
1609                 * Should not happen. Per CPU timers should be
1610                 * canceled _before_ the migration code is called
1611                 */
1612                if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
1613                        __remove_hrtimer(timer, old_base,
1614                                         HRTIMER_STATE_INACTIVE, 0);
1615                        WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
1616                             timer, timer->function, dcpu);
1617                        continue;
1618                }
1619
1620                /*
1621                 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
1622                 * timer could be seen as !active and just vanish away
1623                 * under us on another CPU
1624                 */
1625                __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
1626                timer->base = new_base;
1627                /*
1628                 * Enqueue the timer. Allow reprogramming of the event device
1629                 */
1630                enqueue_hrtimer(timer, new_base, 1);
1631
1632#ifdef CONFIG_HIGH_RES_TIMERS
1633                /*
1634                 * Happens with high res enabled when the timer was
1635                 * already expired and the callback mode is
1636                 * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
1637                 * enqueue code does not move them to the soft irq
1638                 * pending list for performance/latency reasons, but
1639                 * in the migration state, we need to do that
1640                 * otherwise we end up with a stale timer.
1641                 */
1642                if (timer->state == HRTIMER_STATE_MIGRATE) {
1643                        timer->state = HRTIMER_STATE_PENDING;
1644                        list_add_tail(&timer->cb_entry,
1645                                      &new_base->cpu_base->cb_pending);
1646                        raise = 1;
1647                }
1648#endif
1649                /* Clear the migration state bit */
1650                timer->state &= ~HRTIMER_STATE_MIGRATE;
1651        }
1652        return raise;
1653}
1654
1655#ifdef CONFIG_HIGH_RES_TIMERS
1656static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
1657                                   struct hrtimer_cpu_base *new_base)
1658{
1659        struct hrtimer *timer;
1660        int raise = 0;
1661
1662        while (!list_empty(&old_base->cb_pending)) {
1663                timer = list_entry(old_base->cb_pending.next,
1664                                   struct hrtimer, cb_entry);
1665
1666                __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
1667                timer->base = &new_base->clock_base[timer->base->index];
1668                list_add_tail(&timer->cb_entry, &new_base->cb_pending);
1669                raise = 1;
1670        }
1671        return raise;
1672}
1673#else
1674static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
1675                                   struct hrtimer_cpu_base *new_base)
1676{
1677        return 0;
1678}
1679#endif
1680
1681static void migrate_hrtimers(int cpu)
1682{
1683        struct hrtimer_cpu_base *old_base, *new_base;
1684        int i, raise = 0;
1685
1686        BUG_ON(cpu_online(cpu));
1687        old_base = &per_cpu(hrtimer_bases, cpu);
1688        new_base = &get_cpu_var(hrtimer_bases);
1689
1690        tick_cancel_sched_timer(cpu);
1691
1692        local_irq_disable();
1693        spin_lock(&new_base->lock);
1694        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1695
1696        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1697                if (migrate_hrtimer_list(&old_base->clock_base[i],
1698                                         &new_base->clock_base[i], cpu))
1699                        raise = 1;
1700        }
1701
1702        if (migrate_hrtimer_pending(old_base, new_base))
1703                raise = 1;
1704
1705        spin_unlock(&old_base->lock);
1706        spin_unlock(&new_base->lock);
1707        local_irq_enable();
1708        put_cpu_var(hrtimer_bases);
1709
1710        if (raise)
1711                hrtimer_raise_softirq();
1712}
1713#endif /* CONFIG_HOTPLUG_CPU */
1714
1715static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1716                                        unsigned long action, void *hcpu)
1717{
1718        unsigned int cpu = (long)hcpu;
1719
1720        switch (action) {
1721
1722        case CPU_UP_PREPARE:
1723        case CPU_UP_PREPARE_FROZEN:
1724                init_hrtimers_cpu(cpu);
1725                break;
1726
1727#ifdef CONFIG_HOTPLUG_CPU
1728        case CPU_DEAD:
1729        case CPU_DEAD_FROZEN:
1730                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu);
1731                migrate_hrtimers(cpu);
1732                break;
1733#endif
1734
1735        default:
1736                break;
1737        }
1738
1739        return NOTIFY_OK;
1740}
1741
1742static struct notifier_block __cpuinitdata hrtimers_nb = {
1743        .notifier_call = hrtimer_cpu_notify,
1744};
1745
1746void __init hrtimers_init(void)
1747{
1748        hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1749                          (void *)(long)smp_processor_id());
1750        register_cpu_notifier(&hrtimers_nb);
1751#ifdef CONFIG_HIGH_RES_TIMERS
1752        open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
1753#endif
1754}
1755
1756
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.