linux/kernel/hrtimer.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/hrtimer.c
   3 *
   4 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   5 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   6 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
   7 *
   8 *  High-resolution kernel timers
   9 *
  10 *  In contrast to the low-resolution timeout API implemented in
  11 *  kernel/timer.c, hrtimers provide finer resolution and accuracy
  12 *  depending on system configuration and capabilities.
  13 *
  14 *  These timers are currently used for:
  15 *   - itimers
  16 *   - POSIX timers
  17 *   - nanosleep
  18 *   - precise in-kernel timing
  19 *
  20 *  Started by: Thomas Gleixner and Ingo Molnar
  21 *
  22 *  Credits:
  23 *      based on kernel/timer.c
  24 *
  25 *      Help, testing, suggestions, bugfixes, improvements were
  26 *      provided by:
  27 *
  28 *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
  29 *      et. al.
  30 *
  31 *  For licencing details see kernel-base/COPYING
  32 */
  33
  34#include <linux/cpu.h>
  35#include <linux/irq.h>
  36#include <linux/module.h>
  37#include <linux/percpu.h>
  38#include <linux/hrtimer.h>
  39#include <linux/notifier.h>
  40#include <linux/syscalls.h>
  41#include <linux/kallsyms.h>
  42#include <linux/interrupt.h>
  43#include <linux/tick.h>
  44#include <linux/seq_file.h>
  45#include <linux/err.h>
  46#include <linux/debugobjects.h>
  47
  48#include <asm/uaccess.h>
  49
  50/**
  51 * ktime_get - get the monotonic time in ktime_t format
  52 *
  53 * returns the time in ktime_t format
  54 */
  55ktime_t ktime_get(void)
  56{
  57        struct timespec now;
  58
  59        ktime_get_ts(&now);
  60
  61        return timespec_to_ktime(now);
  62}
  63EXPORT_SYMBOL_GPL(ktime_get);
  64
  65/**
  66 * ktime_get_real - get the real (wall-) time in ktime_t format
  67 *
  68 * returns the time in ktime_t format
  69 */
  70ktime_t ktime_get_real(void)
  71{
  72        struct timespec now;
  73
  74        getnstimeofday(&now);
  75
  76        return timespec_to_ktime(now);
  77}
  78
  79EXPORT_SYMBOL_GPL(ktime_get_real);
  80
  81/*
  82 * The timer bases:
  83 *
  84 * Note: If we want to add new timer bases, we have to skip the two
  85 * clock ids captured by the cpu-timers. We do this by holding empty
  86 * entries rather than doing math adjustment of the clock ids.
  87 * This ensures that we capture erroneous accesses to these clock ids
  88 * rather than moving them into the range of valid clock id's.
  89 */
  90DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
  91{
  92
  93        .clock_base =
  94        {
  95                {
  96                        .index = CLOCK_REALTIME,
  97                        .get_time = &ktime_get_real,
  98                        .resolution = KTIME_LOW_RES,
  99                },
 100                {
 101                        .index = CLOCK_MONOTONIC,
 102                        .get_time = &ktime_get,
 103                        .resolution = KTIME_LOW_RES,
 104                },
 105        }
 106};
 107
 108/**
 109 * ktime_get_ts - get the monotonic clock in timespec format
 110 * @ts:         pointer to timespec variable
 111 *
 112 * The function calculates the monotonic clock from the realtime
 113 * clock and the wall_to_monotonic offset and stores the result
 114 * in normalized timespec format in the variable pointed to by @ts.
 115 */
 116void ktime_get_ts(struct timespec *ts)
 117{
 118        struct timespec tomono;
 119        unsigned long seq;
 120
 121        do {
 122                seq = read_seqbegin(&xtime_lock);
 123                getnstimeofday(ts);
 124                tomono = wall_to_monotonic;
 125
 126        } while (read_seqretry(&xtime_lock, seq));
 127
 128        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
 129                                ts->tv_nsec + tomono.tv_nsec);
 130}
 131EXPORT_SYMBOL_GPL(ktime_get_ts);
 132
 133/*
 134 * Get the coarse grained time at the softirq based on xtime and
 135 * wall_to_monotonic.
 136 */
 137static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 138{
 139        ktime_t xtim, tomono;
 140        struct timespec xts, tom;
 141        unsigned long seq;
 142
 143        do {
 144                seq = read_seqbegin(&xtime_lock);
 145                xts = current_kernel_time();
 146                tom = wall_to_monotonic;
 147        } while (read_seqretry(&xtime_lock, seq));
 148
 149        xtim = timespec_to_ktime(xts);
 150        tomono = timespec_to_ktime(tom);
 151        base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
 152        base->clock_base[CLOCK_MONOTONIC].softirq_time =
 153                ktime_add(xtim, tomono);
 154}
 155
 156/*
 157 * Functions and macros which are different for UP/SMP systems are kept in a
 158 * single place
 159 */
 160#ifdef CONFIG_SMP
 161
 162/*
 163 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 164 * means that all timers which are tied to this base via timer->base are
 165 * locked, and the base itself is locked too.
 166 *
 167 * So __run_timers/migrate_timers can safely modify all timers which could
 168 * be found on the lists/queues.
 169 *
 170 * When the timer's base is locked, and the timer removed from list, it is
 171 * possible to set timer->base = NULL and drop the lock: the timer remains
 172 * locked.
 173 */
 174static
 175struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 176                                             unsigned long *flags)
 177{
 178        struct hrtimer_clock_base *base;
 179
 180        for (;;) {
 181                base = timer->base;
 182                if (likely(base != NULL)) {
 183                        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 184                        if (likely(base == timer->base))
 185                                return base;
 186                        /* The timer has migrated to another CPU: */
 187                        spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
 188                }
 189                cpu_relax();
 190        }
 191}
 192
 193/*
 194 * Switch the timer base to the current CPU when possible.
 195 */
 196static inline struct hrtimer_clock_base *
 197switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
 198{
 199        struct hrtimer_clock_base *new_base;
 200        struct hrtimer_cpu_base *new_cpu_base;
 201
 202        new_cpu_base = &__get_cpu_var(hrtimer_bases);
 203        new_base = &new_cpu_base->clock_base[base->index];
 204
 205        if (base != new_base) {
 206                /*
 207                 * We are trying to schedule the timer on the local CPU.
 208                 * However we can't change timer's base while it is running,
 209                 * so we keep it on the same CPU. No hassle vs. reprogramming
 210                 * the event source in the high resolution case. The softirq
 211                 * code will take care of this when the timer function has
 212                 * completed. There is no conflict as we hold the lock until
 213                 * the timer is enqueued.
 214                 */
 215                if (unlikely(hrtimer_callback_running(timer)))
 216                        return base;
 217
 218                /* See the comment in lock_timer_base() */
 219                timer->base = NULL;
 220                spin_unlock(&base->cpu_base->lock);
 221                spin_lock(&new_base->cpu_base->lock);
 222                timer->base = new_base;
 223        }
 224        return new_base;
 225}
 226
 227#else /* CONFIG_SMP */
 228
 229static inline struct hrtimer_clock_base *
 230lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 231{
 232        struct hrtimer_clock_base *base = timer->base;
 233
 234        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 235
 236        return base;
 237}
 238
 239# define switch_hrtimer_base(t, b)      (b)
 240
 241#endif  /* !CONFIG_SMP */
 242
 243/*
 244 * Functions for the union type storage format of ktime_t which are
 245 * too large for inlining:
 246 */
 247#if BITS_PER_LONG < 64
 248# ifndef CONFIG_KTIME_SCALAR
 249/**
 250 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
 251 * @kt:         addend
 252 * @nsec:       the scalar nsec value to add
 253 *
 254 * Returns the sum of kt and nsec in ktime_t format
 255 */
 256ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
 257{
 258        ktime_t tmp;
 259
 260        if (likely(nsec < NSEC_PER_SEC)) {
 261                tmp.tv64 = nsec;
 262        } else {
 263                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 264
 265                tmp = ktime_set((long)nsec, rem);
 266        }
 267
 268        return ktime_add(kt, tmp);
 269}
 270
 271EXPORT_SYMBOL_GPL(ktime_add_ns);
 272
 273/**
 274 * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
 275 * @kt:         minuend
 276 * @nsec:       the scalar nsec value to subtract
 277 *
 278 * Returns the subtraction of @nsec from @kt in ktime_t format
 279 */
 280ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
 281{
 282        ktime_t tmp;
 283
 284        if (likely(nsec < NSEC_PER_SEC)) {
 285                tmp.tv64 = nsec;
 286        } else {
 287                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 288
 289                tmp = ktime_set((long)nsec, rem);
 290        }
 291
 292        return ktime_sub(kt, tmp);
 293}
 294
 295EXPORT_SYMBOL_GPL(ktime_sub_ns);
 296# endif /* !CONFIG_KTIME_SCALAR */
 297
 298/*
 299 * Divide a ktime value by a nanosecond value
 300 */
 301u64 ktime_divns(const ktime_t kt, s64 div)
 302{
 303        u64 dclc;
 304        int sft = 0;
 305
 306        dclc = ktime_to_ns(kt);
 307        /* Make sure the divisor is less than 2^32: */
 308        while (div >> 32) {
 309                sft++;
 310                div >>= 1;
 311        }
 312        dclc >>= sft;
 313        do_div(dclc, (unsigned long) div);
 314
 315        return dclc;
 316}
 317#endif /* BITS_PER_LONG >= 64 */
 318
 319/*
 320 * Add two ktime values and do a safety check for overflow:
 321 */
 322ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
 323{
 324        ktime_t res = ktime_add(lhs, rhs);
 325
 326        /*
 327         * We use KTIME_SEC_MAX here, the maximum timeout which we can
 328         * return to user space in a timespec:
 329         */
 330        if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
 331                res = ktime_set(KTIME_SEC_MAX, 0);
 332
 333        return res;
 334}
 335
 336#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 337
 338static struct debug_obj_descr hrtimer_debug_descr;
 339
 340/*
 341 * fixup_init is called when:
 342 * - an active object is initialized
 343 */
 344static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
 345{
 346        struct hrtimer *timer = addr;
 347
 348        switch (state) {
 349        case ODEBUG_STATE_ACTIVE:
 350                hrtimer_cancel(timer);
 351                debug_object_init(timer, &hrtimer_debug_descr);
 352                return 1;
 353        default:
 354                return 0;
 355        }
 356}
 357
 358/*
 359 * fixup_activate is called when:
 360 * - an active object is activated
 361 * - an unknown object is activated (might be a statically initialized object)
 362 */
 363static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
 364{
 365        switch (state) {
 366
 367        case ODEBUG_STATE_NOTAVAILABLE:
 368                WARN_ON_ONCE(1);
 369                return 0;
 370
 371        case ODEBUG_STATE_ACTIVE:
 372                WARN_ON(1);
 373
 374        default:
 375                return 0;
 376        }
 377}
 378
 379/*
 380 * fixup_free is called when:
 381 * - an active object is freed
 382 */
 383static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
 384{
 385        struct hrtimer *timer = addr;
 386
 387        switch (state) {
 388        case ODEBUG_STATE_ACTIVE:
 389                hrtimer_cancel(timer);
 390                debug_object_free(timer, &hrtimer_debug_descr);
 391                return 1;
 392        default:
 393                return 0;
 394        }
 395}
 396
 397static struct debug_obj_descr hrtimer_debug_descr = {
 398        .name           = "hrtimer",
 399        .fixup_init     = hrtimer_fixup_init,
 400        .fixup_activate = hrtimer_fixup_activate,
 401        .fixup_free     = hrtimer_fixup_free,
 402};
 403
 404static inline void debug_hrtimer_init(struct hrtimer *timer)
 405{
 406        debug_object_init(timer, &hrtimer_debug_descr);
 407}
 408
 409static inline void debug_hrtimer_activate(struct hrtimer *timer)
 410{
 411        debug_object_activate(timer, &hrtimer_debug_descr);
 412}
 413
 414static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
 415{
 416        debug_object_deactivate(timer, &hrtimer_debug_descr);
 417}
 418
 419static inline void debug_hrtimer_free(struct hrtimer *timer)
 420{
 421        debug_object_free(timer, &hrtimer_debug_descr);
 422}
 423
 424static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 425                           enum hrtimer_mode mode);
 426
 427void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
 428                           enum hrtimer_mode mode)
 429{
 430        debug_object_init_on_stack(timer, &hrtimer_debug_descr);
 431        __hrtimer_init(timer, clock_id, mode);
 432}
 433
 434void destroy_hrtimer_on_stack(struct hrtimer *timer)
 435{
 436        debug_object_free(timer, &hrtimer_debug_descr);
 437}
 438
 439#else
 440static inline void debug_hrtimer_init(struct hrtimer *timer) { }
 441static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 442static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 443#endif
 444
 445/*
 446 * Check, whether the timer is on the callback pending list
 447 */
 448static inline int hrtimer_cb_pending(const struct hrtimer *timer)
 449{
 450        return timer->state & HRTIMER_STATE_PENDING;
 451}
 452
 453/*
 454 * Remove a timer from the callback pending list
 455 */
 456static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
 457{
 458        list_del_init(&timer->cb_entry);
 459}
 460
 461/* High resolution timer related functions */
 462#ifdef CONFIG_HIGH_RES_TIMERS
 463
 464/*
 465 * High resolution timer enabled ?
 466 */
 467static int hrtimer_hres_enabled __read_mostly  = 1;
 468
 469/*
 470 * Enable / Disable high resolution mode
 471 */
 472static int __init setup_hrtimer_hres(char *str)
 473{
 474        if (!strcmp(str, "off"))
 475                hrtimer_hres_enabled = 0;
 476        else if (!strcmp(str, "on"))
 477                hrtimer_hres_enabled = 1;
 478        else
 479                return 0;
 480        return 1;
 481}
 482
 483__setup("highres=", setup_hrtimer_hres);
 484
 485/*
 486 * hrtimer_high_res_enabled - query, if the highres mode is enabled
 487 */
 488static inline int hrtimer_is_hres_enabled(void)
 489{
 490        return hrtimer_hres_enabled;
 491}
 492
 493/*
 494 * Is the high resolution mode active ?
 495 */
 496static inline int hrtimer_hres_active(void)
 497{
 498        return __get_cpu_var(hrtimer_bases).hres_active;
 499}
 500
 501/*
 502 * Reprogram the event source with checking both queues for the
 503 * next event
 504 * Called with interrupts disabled and base->lock held
 505 */
 506static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
 507{
 508        int i;
 509        struct hrtimer_clock_base *base = cpu_base->clock_base;
 510        ktime_t expires;
 511
 512        cpu_base->expires_next.tv64 = KTIME_MAX;
 513
 514        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 515                struct hrtimer *timer;
 516
 517                if (!base->first)
 518                        continue;
 519                timer = rb_entry(base->first, struct hrtimer, node);
 520                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 521                if (expires.tv64 < cpu_base->expires_next.tv64)
 522                        cpu_base->expires_next = expires;
 523        }
 524
 525        if (cpu_base->expires_next.tv64 != KTIME_MAX)
 526                tick_program_event(cpu_base->expires_next, 1);
 527}
 528
 529/*
 530 * Shared reprogramming for clock_realtime and clock_monotonic
 531 *
 532 * When a timer is enqueued and expires earlier than the already enqueued
 533 * timers, we have to check, whether it expires earlier than the timer for
 534 * which the clock event device was armed.
 535 *
 536 * Called with interrupts disabled and base->cpu_base.lock held
 537 */
 538static int hrtimer_reprogram(struct hrtimer *timer,
 539                             struct hrtimer_clock_base *base)
 540{
 541        ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
 542        ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 543        int res;
 544
 545        WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
 546
 547        /*
 548         * When the callback is running, we do not reprogram the clock event
 549         * device. The timer callback is either running on a different CPU or
 550         * the callback is executed in the hrtimer_interrupt context. The
 551         * reprogramming is handled either by the softirq, which called the
 552         * callback or at the end of the hrtimer_interrupt.
 553         */
 554        if (hrtimer_callback_running(timer))
 555                return 0;
 556
 557        /*
 558         * CLOCK_REALTIME timer might be requested with an absolute
 559         * expiry time which is less than base->offset. Nothing wrong
 560         * about that, just avoid to call into the tick code, which
 561         * has now objections against negative expiry values.
 562         */
 563        if (expires.tv64 < 0)
 564                return -ETIME;
 565
 566        if (expires.tv64 >= expires_next->tv64)
 567                return 0;
 568
 569        /*
 570         * Clockevents returns -ETIME, when the event was in the past.
 571         */
 572        res = tick_program_event(expires, 0);
 573        if (!IS_ERR_VALUE(res))
 574                *expires_next = expires;
 575        return res;
 576}
 577
 578
 579/*
 580 * Retrigger next event is called after clock was set
 581 *
 582 * Called with interrupts disabled via on_each_cpu()
 583 */
 584static void retrigger_next_event(void *arg)
 585{
 586        struct hrtimer_cpu_base *base;
 587        struct timespec realtime_offset;
 588        unsigned long seq;
 589
 590        if (!hrtimer_hres_active())
 591                return;
 592
 593        do {
 594                seq = read_seqbegin(&xtime_lock);
 595                set_normalized_timespec(&realtime_offset,
 596                                        -wall_to_monotonic.tv_sec,
 597                                        -wall_to_monotonic.tv_nsec);
 598        } while (read_seqretry(&xtime_lock, seq));
 599
 600        base = &__get_cpu_var(hrtimer_bases);
 601
 602        /* Adjust CLOCK_REALTIME offset */
 603        spin_lock(&base->lock);
 604        base->clock_base[CLOCK_REALTIME].offset =
 605                timespec_to_ktime(realtime_offset);
 606
 607        hrtimer_force_reprogram(base);
 608        spin_unlock(&base->lock);
 609}
 610
 611/*
 612 * Clock realtime was set
 613 *
 614 * Change the offset of the realtime clock vs. the monotonic
 615 * clock.
 616 *
 617 * We might have to reprogram the high resolution timer interrupt. On
 618 * SMP we call the architecture specific code to retrigger _all_ high
 619 * resolution timer interrupts. On UP we just disable interrupts and
 620 * call the high resolution interrupt code.
 621 */
 622void clock_was_set(void)
 623{
 624        /* Retrigger the CPU local events everywhere */
 625        on_each_cpu(retrigger_next_event, NULL, 1);
 626}
 627
 628/*
 629 * During resume we might have to reprogram the high resolution timer
 630 * interrupt (on the local CPU):
 631 */
 632void hres_timers_resume(void)
 633{
 634        /* Retrigger the CPU local events: */
 635        retrigger_next_event(NULL);
 636}
 637
 638/*
 639 * Initialize the high resolution related parts of cpu_base
 640 */
 641static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
 642{
 643        base->expires_next.tv64 = KTIME_MAX;
 644        base->hres_active = 0;
 645}
 646
 647/*
 648 * Initialize the high resolution related parts of a hrtimer
 649 */
 650static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 651{
 652}
 653
 654/*
 655 * When High resolution timers are active, try to reprogram. Note, that in case
 656 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
 657 * check happens. The timer gets enqueued into the rbtree. The reprogramming
 658 * and expiry check is done in the hrtimer_interrupt or in the softirq.
 659 */
 660static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 661                                            struct hrtimer_clock_base *base)
 662{
 663        if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
 664
 665                /* Timer is expired, act upon the callback mode */
 666                switch(timer->cb_mode) {
 667                case HRTIMER_CB_IRQSAFE_PERCPU:
 668                case HRTIMER_CB_IRQSAFE_UNLOCKED:
 669                        /*
 670                         * This is solely for the sched tick emulation with
 671                         * dynamic tick support to ensure that we do not
 672                         * restart the tick right on the edge and end up with
 673                         * the tick timer in the softirq ! The calling site
 674                         * takes care of this. Also used for hrtimer sleeper !
 675                         */
 676                        debug_hrtimer_deactivate(timer);
 677                        return 1;
 678                case HRTIMER_CB_SOFTIRQ:
 679                        /*
 680                         * Move everything else into the softirq pending list !
 681                         */
 682                        list_add_tail(&timer->cb_entry,
 683                                      &base->cpu_base->cb_pending);
 684                        timer->state = HRTIMER_STATE_PENDING;
 685                        return 1;
 686                default:
 687                        BUG();
 688                }
 689        }
 690        return 0;
 691}
 692
 693/*
 694 * Switch to high resolution mode
 695 */
 696static int hrtimer_switch_to_hres(void)
 697{
 698        int cpu = smp_processor_id();
 699        struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 700        unsigned long flags;
 701
 702        if (base->hres_active)
 703                return 1;
 704
 705        local_irq_save(flags);
 706
 707        if (tick_init_highres()) {
 708                local_irq_restore(flags);
 709                printk(KERN_WARNING "Could not switch to high resolution "
 710                                    "mode on CPU %d\n", cpu);
 711                return 0;
 712        }
 713        base->hres_active = 1;
 714        base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
 715        base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
 716
 717        tick_setup_sched_timer();
 718
 719        /* "Retrigger" the interrupt to get things going */
 720        retrigger_next_event(NULL);
 721        local_irq_restore(flags);
 722        printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
 723               smp_processor_id());
 724        return 1;
 725}
 726
 727static inline void hrtimer_raise_softirq(void)
 728{
 729        raise_softirq(HRTIMER_SOFTIRQ);
 730}
 731
 732#else
 733
 734static inline int hrtimer_hres_active(void) { return 0; }
 735static inline int hrtimer_is_hres_enabled(void) { return 0; }
 736static inline int hrtimer_switch_to_hres(void) { return 0; }
 737static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
 738static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 739                                            struct hrtimer_clock_base *base)
 740{
 741        return 0;
 742}
 743static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 744static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
 745static inline int hrtimer_reprogram(struct hrtimer *timer,
 746                                    struct hrtimer_clock_base *base)
 747{
 748        return 0;
 749}
 750static inline void hrtimer_raise_softirq(void) { }
 751
 752#endif /* CONFIG_HIGH_RES_TIMERS */
 753
 754#ifdef CONFIG_TIMER_STATS
 755void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
 756{
 757        if (timer->start_site)
 758                return;
 759
 760        timer->start_site = addr;
 761        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 762        timer->start_pid = current->pid;
 763}
 764#endif
 765
 766/*
 767 * Counterpart to lock_hrtimer_base above:
 768 */
 769static inline
 770void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 771{
 772        spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
 773}
 774
 775/**
 776 * hrtimer_forward - forward the timer expiry
 777 * @timer:      hrtimer to forward
 778 * @now:        forward past this time
 779 * @interval:   the interval to forward
 780 *
 781 * Forward the timer expiry so it will expire in the future.
 782 * Returns the number of overruns.
 783 */
 784u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 785{
 786        u64 orun = 1;
 787        ktime_t delta;
 788
 789        delta = ktime_sub(now, hrtimer_get_expires(timer));
 790
 791        if (delta.tv64 < 0)
 792                return 0;
 793
 794        if (interval.tv64 < timer->base->resolution.tv64)
 795                interval.tv64 = timer->base->resolution.tv64;
 796
 797        if (unlikely(delta.tv64 >= interval.tv64)) {
 798                s64 incr = ktime_to_ns(interval);
 799
 800                orun = ktime_divns(delta, incr);
 801                hrtimer_add_expires_ns(timer, incr * orun);
 802                if (hrtimer_get_expires_tv64(timer) > now.tv64)
 803                        return orun;
 804                /*
 805                 * This (and the ktime_add() below) is the
 806                 * correction for exact:
 807                 */
 808                orun++;
 809        }
 810        hrtimer_add_expires(timer, interval);
 811
 812        return orun;
 813}
 814EXPORT_SYMBOL_GPL(hrtimer_forward);
 815
 816/*
 817 * enqueue_hrtimer - internal function to (re)start a timer
 818 *
 819 * The timer is inserted in expiry order. Insertion into the
 820 * red black tree is O(log(n)). Must hold the base lock.
 821 */
 822static void enqueue_hrtimer(struct hrtimer *timer,
 823                            struct hrtimer_clock_base *base, int reprogram)
 824{
 825        struct rb_node **link = &base->active.rb_node;
 826        struct rb_node *parent = NULL;
 827        struct hrtimer *entry;
 828        int leftmost = 1;
 829
 830        debug_hrtimer_activate(timer);
 831
 832        /*
 833         * Find the right place in the rbtree:
 834         */
 835        while (*link) {
 836                parent = *link;
 837                entry = rb_entry(parent, struct hrtimer, node);
 838                /*
 839                 * We dont care about collisions. Nodes with
 840                 * the same expiry time stay together.
 841                 */
 842                if (hrtimer_get_expires_tv64(timer) <
 843                                hrtimer_get_expires_tv64(entry)) {
 844                        link = &(*link)->rb_left;
 845                } else {
 846                        link = &(*link)->rb_right;
 847                        leftmost = 0;
 848                }
 849        }
 850
 851        /*
 852         * Insert the timer to the rbtree and check whether it
 853         * replaces the first pending timer
 854         */
 855        if (leftmost) {
 856                /*
 857                 * Reprogram the clock event device. When the timer is already
 858                 * expired hrtimer_enqueue_reprogram has either called the
 859                 * callback or added it to the pending list and raised the
 860                 * softirq.
 861                 *
 862                 * This is a NOP for !HIGHRES
 863                 */
 864                if (reprogram && hrtimer_enqueue_reprogram(timer, base))
 865                        return;
 866
 867                base->first = &timer->node;
 868        }
 869
 870        rb_link_node(&timer->node, parent, link);
 871        rb_insert_color(&timer->node, &base->active);
 872        /*
 873         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
 874         * state of a possibly running callback.
 875         */
 876        timer->state |= HRTIMER_STATE_ENQUEUED;
 877}
 878
 879/*
 880 * __remove_hrtimer - internal function to remove a timer
 881 *
 882 * Caller must hold the base lock.
 883 *
 884 * High resolution timer mode reprograms the clock event device when the
 885 * timer is the one which expires next. The caller can disable this by setting
 886 * reprogram to zero. This is useful, when the context does a reprogramming
 887 * anyway (e.g. timer interrupt)
 888 */
 889static void __remove_hrtimer(struct hrtimer *timer,
 890                             struct hrtimer_clock_base *base,
 891                             unsigned long newstate, int reprogram)
 892{
 893        /* High res. callback list. NOP for !HIGHRES */
 894        if (hrtimer_cb_pending(timer))
 895                hrtimer_remove_cb_pending(timer);
 896        else {
 897                /*
 898                 * Remove the timer from the rbtree and replace the
 899                 * first entry pointer if necessary.
 900                 */
 901                if (base->first == &timer->node) {
 902                        base->first = rb_next(&timer->node);
 903                        /* Reprogram the clock event device. if enabled */
 904                        if (reprogram && hrtimer_hres_active())
 905                                hrtimer_force_reprogram(base->cpu_base);
 906                }
 907                rb_erase(&timer->node, &base->active);
 908        }
 909        timer->state = newstate;
 910}
 911
 912/*
 913 * remove hrtimer, called with base lock held
 914 */
 915static inline int
 916remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 917{
 918        if (hrtimer_is_queued(timer)) {
 919                int reprogram;
 920
 921                /*
 922                 * Remove the timer and force reprogramming when high
 923                 * resolution mode is active and the timer is on the current
 924                 * CPU. If we remove a timer on another CPU, reprogramming is
 925                 * skipped. The interrupt event on this CPU is fired and
 926                 * reprogramming happens in the interrupt handler. This is a
 927                 * rare case and less expensive than a smp call.
 928                 */
 929                debug_hrtimer_deactivate(timer);
 930                timer_stats_hrtimer_clear_start_info(timer);
 931                reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 932                __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
 933                                 reprogram);
 934                return 1;
 935        }
 936        return 0;
 937}
 938
 939/**
 940 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
 941 * @timer:      the timer to be added
 942 * @tim:        expiry time
 943 * @delta_ns:   "slack" range for the timer
 944 * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 945 *
 946 * Returns:
 947 *  0 on success
 948 *  1 when the timer was active
 949 */
 950int
 951hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
 952                        const enum hrtimer_mode mode)
 953{
 954        struct hrtimer_clock_base *base, *new_base;
 955        unsigned long flags;
 956        int ret, raise;
 957
 958        base = lock_hrtimer_base(timer, &flags);
 959
 960        /* Remove an active timer from the queue: */
 961        ret = remove_hrtimer(timer, base);
 962
 963        /* Switch the timer base, if necessary: */
 964        new_base = switch_hrtimer_base(timer, base);
 965
 966        if (mode == HRTIMER_MODE_REL) {
 967                tim = ktime_add_safe(tim, new_base->get_time());
 968                /*
 969                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
 970                 * to signal that they simply return xtime in
 971                 * do_gettimeoffset(). In this case we want to round up by
 972                 * resolution when starting a relative timer, to avoid short
 973                 * timeouts. This will go away with the GTOD framework.
 974                 */
 975#ifdef CONFIG_TIME_LOW_RES
 976                tim = ktime_add_safe(tim, base->resolution);
 977#endif
 978        }
 979
 980        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 981
 982        timer_stats_hrtimer_set_start_info(timer);
 983
 984        /*
 985         * Only allow reprogramming if the new base is on this CPU.
 986         * (it might still be on another CPU if the timer was pending)
 987         */
 988        enqueue_hrtimer(timer, new_base,
 989                        new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 990
 991        /*
 992         * The timer may be expired and moved to the cb_pending
 993         * list. We can not raise the softirq with base lock held due
 994         * to a possible deadlock with runqueue lock.
 995         */
 996        raise = timer->state == HRTIMER_STATE_PENDING;
 997
 998        /*
 999         * We use preempt_disable to prevent this task from migrating after
1000         * setting up the softirq and raising it. Otherwise, if me migrate
1001         * we will raise the softirq on the wrong CPU.
1002         */
1003        preempt_disable();
1004
1005        unlock_hrtimer_base(timer, &flags);
1006
1007        if (raise)
1008                hrtimer_raise_softirq();
1009        preempt_enable();
1010
1011        return ret;
1012}
1013EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1014
1015/**
1016 * hrtimer_start - (re)start an hrtimer on the current CPU
1017 * @timer:      the timer to be added
1018 * @tim:        expiry time
1019 * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
1020 *
1021 * Returns:
1022 *  0 on success
1023 *  1 when the timer was active
1024 */
1025int
1026hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1027{
1028        return hrtimer_start_range_ns(timer, tim, 0, mode);
1029}
1030EXPORT_SYMBOL_GPL(hrtimer_start);
1031
1032
1033/**
1034 * hrtimer_try_to_cancel - try to deactivate a timer
1035 * @timer:      hrtimer to stop
1036 *
1037 * Returns:
1038 *  0 when the timer was not active
1039 *  1 when the timer was active
1040 * -1 when the timer is currently excuting the callback function and
1041 *    cannot be stopped
1042 */
1043int hrtimer_try_to_cancel(struct hrtimer *timer)
1044{
1045        struct hrtimer_clock_base *base;
1046        unsigned long flags;
1047        int ret = -1;
1048
1049        base = lock_hrtimer_base(timer, &flags);
1050
1051        if (!hrtimer_callback_running(timer))
1052                ret = remove_hrtimer(timer, base);
1053
1054        unlock_hrtimer_base(timer, &flags);
1055
1056        return ret;
1057
1058}
1059EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1060
1061/**
1062 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1063 * @timer:      the timer to be cancelled
1064 *
1065 * Returns:
1066 *  0 when the timer was not active
1067 *  1 when the timer was active
1068 */
1069int hrtimer_cancel(struct hrtimer *timer)
1070{
1071        for (;;) {
1072                int ret = hrtimer_try_to_cancel(timer);
1073
1074                if (ret >= 0)
1075                        return ret;
1076                cpu_relax();
1077        }
1078}
1079EXPORT_SYMBOL_GPL(hrtimer_cancel);
1080
1081/**
1082 * hrtimer_get_remaining - get remaining time for the timer
1083 * @timer:      the timer to read
1084 */
1085ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1086{
1087        struct hrtimer_clock_base *base;
1088        unsigned long flags;
1089        ktime_t rem;
1090
1091        base = lock_hrtimer_base(timer, &flags);
1092        rem = hrtimer_expires_remaining(timer);
1093        unlock_hrtimer_base(timer, &flags);
1094
1095        return rem;
1096}
1097EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
1098
1099#ifdef CONFIG_NO_HZ
1100/**
1101 * hrtimer_get_next_event - get the time until next expiry event
1102 *
1103 * Returns the delta to the next expiry event or KTIME_MAX if no timer
1104 * is pending.
1105 */
1106ktime_t hrtimer_get_next_event(void)
1107{
1108        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1109        struct hrtimer_clock_base *base = cpu_base->clock_base;
1110        ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
1111        unsigned long flags;
1112        int i;
1113
1114        spin_lock_irqsave(&cpu_base->lock, flags);
1115
1116        if (!hrtimer_hres_active()) {
1117                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
1118                        struct hrtimer *timer;
1119
1120                        if (!base->first)
1121                                continue;
1122
1123                        timer = rb_entry(base->first, struct hrtimer, node);
1124                        delta.tv64 = hrtimer_get_expires_tv64(timer);
1125                        delta = ktime_sub(delta, base->get_time());
1126                        if (delta.tv64 < mindelta.tv64)
1127                                mindelta.tv64 = delta.tv64;
1128                }
1129        }
1130
1131        spin_unlock_irqrestore(&cpu_base->lock, flags);
1132
1133        if (mindelta.tv64 < 0)
1134                mindelta.tv64 = 0;
1135        return mindelta;
1136}
1137#endif
1138
1139static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1140                           enum hrtimer_mode mode)
1141{
1142        struct hrtimer_cpu_base *cpu_base;
1143
1144        memset(timer, 0, sizeof(struct hrtimer));
1145
1146        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1147
1148        if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1149                clock_id = CLOCK_MONOTONIC;
1150
1151        timer->base = &cpu_base->clock_base[clock_id];
1152        INIT_LIST_HEAD(&timer->cb_entry);
1153        hrtimer_init_timer_hres(timer);
1154
1155#ifdef CONFIG_TIMER_STATS
1156        timer->start_site = NULL;
1157        timer->start_pid = -1;
1158        memset(timer->start_comm, 0, TASK_COMM_LEN);
1159#endif
1160}
1161
1162/**
1163 * hrtimer_init - initialize a timer to the given clock
1164 * @timer:      the timer to be initialized
1165 * @clock_id:   the clock to be used
1166 * @mode:       timer mode abs/rel
1167 */
1168void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1169                  enum hrtimer_mode mode)
1170{
1171        debug_hrtimer_init(timer);
1172        __hrtimer_init(timer, clock_id, mode);
1173}
1174EXPORT_SYMBOL_GPL(hrtimer_init);
1175
1176/**
1177 * hrtimer_get_res - get the timer resolution for a clock
1178 * @which_clock: which clock to query
1179 * @tp:          pointer to timespec variable to store the resolution
1180 *
1181 * Store the resolution of the clock selected by @which_clock in the
1182 * variable pointed to by @tp.
1183 */
1184int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1185{
1186        struct hrtimer_cpu_base *cpu_base;
1187
1188        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1189        *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution);
1190
1191        return 0;
1192}
1193EXPORT_SYMBOL_GPL(hrtimer_get_res);
1194
1195static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1196{
1197        spin_lock_irq(&cpu_base->lock);
1198
1199        while (!list_empty(&cpu_base->cb_pending)) {
1200                enum hrtimer_restart (*fn)(struct hrtimer *);
1201                struct hrtimer *timer;
1202                int restart;
1203                int emulate_hardirq_ctx = 0;
1204
1205                timer = list_entry(cpu_base->cb_pending.next,
1206                                   struct hrtimer, cb_entry);
1207
1208                debug_hrtimer_deactivate(timer);
1209                timer_stats_account_hrtimer(timer);
1210
1211                fn = timer->function;
1212                /*
1213                 * A timer might have been added to the cb_pending list
1214                 * when it was migrated during a cpu-offline operation.
1215                 * Emulate hardirq context for such timers.
1216                 */
1217                if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1218                    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
1219                        emulate_hardirq_ctx = 1;
1220
1221                __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1222                spin_unlock_irq(&cpu_base->lock);
1223
1224                if (unlikely(emulate_hardirq_ctx)) {
1225                        local_irq_disable();
1226                        restart = fn(timer);
1227                        local_irq_enable();
1228                } else
1229                        restart = fn(timer);
1230
1231                spin_lock_irq(&cpu_base->lock);
1232
1233                timer->state &= ~HRTIMER_STATE_CALLBACK;
1234                if (restart == HRTIMER_RESTART) {
1235                        BUG_ON(hrtimer_active(timer));
1236                        /*
1237                         * Enqueue the timer, allow reprogramming of the event
1238                         * device
1239                         */
1240                        enqueue_hrtimer(timer, timer->base, 1);
1241                } else if (hrtimer_active(timer)) {
1242                        /*
1243                         * If the timer was rearmed on another CPU, reprogram
1244                         * the event device.
1245                         */
1246                        struct hrtimer_clock_base *base = timer->base;
1247
1248                        if (base->first == &timer->node &&
1249                            hrtimer_reprogram(timer, base)) {
1250                                /*
1251                                 * Timer is expired. Thus move it from tree to
1252                                 * pending list again.
1253                                 */
1254                                __remove_hrtimer(timer, base,
1255                                                 HRTIMER_STATE_PENDING, 0);
1256                                list_add_tail(&timer->cb_entry,
1257                                              &base->cpu_base->cb_pending);
1258                        }
1259                }
1260        }
1261        spin_unlock_irq(&cpu_base->lock);
1262}
1263
1264static void __run_hrtimer(struct hrtimer *timer)
1265{
1266        struct hrtimer_clock_base *base = timer->base;
1267        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1268        enum hrtimer_restart (*fn)(struct hrtimer *);
1269        int restart;
1270
1271        debug_hrtimer_deactivate(timer);
1272        __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1273        timer_stats_account_hrtimer(timer);
1274
1275        fn = timer->function;
1276        if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1277            timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
1278                /*
1279                 * Used for scheduler timers, avoid lock inversion with
1280                 * rq->lock and tasklist_lock.
1281                 *
1282                 * These timers are required to deal with enqueue expiry
1283                 * themselves and are not allowed to migrate.
1284                 */
1285                spin_unlock(&cpu_base->lock);
1286                restart = fn(timer);
1287                spin_lock(&cpu_base->lock);
1288        } else
1289                restart = fn(timer);
1290
1291        /*
1292         * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
1293         * reprogramming of the event hardware. This happens at the end of this
1294         * function anyway.
1295         */
1296        if (restart != HRTIMER_NORESTART) {
1297                BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
1298                enqueue_hrtimer(timer, base, 0);
1299        }
1300        timer->state &= ~HRTIMER_STATE_CALLBACK;
1301}
1302
1303#ifdef CONFIG_HIGH_RES_TIMERS
1304
1305/*
1306 * High resolution timer interrupt
1307 * Called with interrupts disabled
1308 */
1309void hrtimer_interrupt(struct clock_event_device *dev)
1310{
1311        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1312        struct hrtimer_clock_base *base;
1313        ktime_t expires_next, now;
1314        int i, raise = 0;
1315
1316        BUG_ON(!cpu_base->hres_active);
1317        cpu_base->nr_events++;
1318        dev->next_event.tv64 = KTIME_MAX;
1319
1320 retry:
1321        now = ktime_get();
1322
1323        expires_next.tv64 = KTIME_MAX;
1324
1325        base = cpu_base->clock_base;
1326
1327        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1328                ktime_t basenow;
1329                struct rb_node *node;
1330
1331                spin_lock(&cpu_base->lock);
1332
1333                basenow = ktime_add(now, base->offset);
1334
1335                while ((node = base->first)) {
1336                        struct hrtimer *timer;
1337
1338                        timer = rb_entry(node, struct hrtimer, node);
1339
1340                        /*
1341                         * The immediate goal for using the softexpires is
1342                         * minimizing wakeups, not running timers at the
1343                         * earliest interrupt after their soft expiration.
1344                         * This allows us to avoid using a Priority Search
1345                         * Tree, which can answer a stabbing querry for
1346                         * overlapping intervals and instead use the simple
1347                         * BST we already have.
1348                         * We don't add extra wakeups by delaying timers that
1349                         * are right-of a not yet expired timer, because that
1350                         * timer will have to trigger a wakeup anyway.
1351                         */
1352
1353                        if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
1354                                ktime_t expires;
1355
1356                                expires = ktime_sub(hrtimer_get_expires(timer),
1357                                                    base->offset);
1358                                if (expires.tv64 < expires_next.tv64)
1359                                        expires_next = expires;
1360                                break;
1361                        }
1362
1363                        /* Move softirq callbacks to the pending list */
1364                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1365                                __remove_hrtimer(timer, base,
1366                                                 HRTIMER_STATE_PENDING, 0);
1367                                list_add_tail(&timer->cb_entry,
1368                                              &base->cpu_base->cb_pending);
1369                                raise = 1;
1370                                continue;
1371                        }
1372
1373                        __run_hrtimer(timer);
1374                }
1375                spin_unlock(&cpu_base->lock);
1376                base++;
1377        }
1378
1379        cpu_base->expires_next = expires_next;
1380
1381        /* Reprogramming necessary ? */
1382        if (expires_next.tv64 != KTIME_MAX) {
1383                if (tick_program_event(expires_next, 0))
1384                        goto retry;
1385        }
1386
1387        /* Raise softirq ? */
1388        if (raise)
1389                raise_softirq(HRTIMER_SOFTIRQ);
1390}
1391
1392/**
1393 * hrtimer_peek_ahead_timers -- run soft-expired timers now
1394 *
1395 * hrtimer_peek_ahead_timers will peek at the timer queue of
1396 * the current cpu and check if there are any timers for which
1397 * the soft expires time has passed. If any such timers exist,
1398 * they are run immediately and then removed from the timer queue.
1399 *
1400 */
1401void hrtimer_peek_ahead_timers(void)
1402{
1403        struct tick_device *td;
1404        unsigned long flags;
1405
1406        if (!hrtimer_hres_active())
1407                return;
1408
1409        local_irq_save(flags);
1410        td = &__get_cpu_var(tick_cpu_device);
1411        if (td && td->evtdev)
1412                hrtimer_interrupt(td->evtdev);
1413        local_irq_restore(flags);
1414}
1415
1416static void run_hrtimer_softirq(struct softirq_action *h)
1417{
1418        run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
1419}
1420
1421#endif  /* CONFIG_HIGH_RES_TIMERS */
1422
1423/*
1424 * Called from timer softirq every jiffy, expire hrtimers:
1425 *
1426 * For HRT its the fall back code to run the softirq in the timer
1427 * softirq context in case the hrtimer initialization failed or has
1428 * not been done yet.
1429 */
1430void hrtimer_run_pending(void)
1431{
1432        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1433
1434        if (hrtimer_hres_active())
1435                return;
1436
1437        /*
1438         * This _is_ ugly: We have to check in the softirq context,
1439         * whether we can switch to highres and / or nohz mode. The
1440         * clocksource switch happens in the timer interrupt with
1441         * xtime_lock held. Notification from there only sets the
1442         * check bit in the tick_oneshot code, otherwise we might
1443         * deadlock vs. xtime_lock.
1444         */
1445        if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1446                hrtimer_switch_to_hres();
1447
1448        run_hrtimer_pending(cpu_base);
1449}
1450
1451/*
1452 * Called from hardirq context every jiffy
1453 */
1454void hrtimer_run_queues(void)
1455{
1456        struct rb_node *node;
1457        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1458        struct hrtimer_clock_base *base;
1459        int index, gettime = 1;
1460
1461        if (hrtimer_hres_active())
1462                return;
1463
1464        for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
1465                base = &cpu_base->clock_base[index];
1466
1467                if (!base->first)
1468                        continue;
1469
1470                if (gettime) {
1471                        hrtimer_get_softirq_time(cpu_base);
1472                        gettime = 0;
1473                }
1474
1475                spin_lock(&cpu_base->lock);
1476
1477                while ((node = base->first)) {
1478                        struct hrtimer *timer;
1479
1480                        timer = rb_entry(node, struct hrtimer, node);
1481                        if (base->softirq_time.tv64 <=
1482                                        hrtimer_get_expires_tv64(timer))
1483                                break;
1484
1485                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1486                                __remove_hrtimer(timer, base,
1487                                        HRTIMER_STATE_PENDING, 0);
1488                                list_add_tail(&timer->cb_entry,
1489                                        &base->cpu_base->cb_pending);
1490                                continue;
1491                        }
1492
1493                        __run_hrtimer(timer);
1494                }
1495                spin_unlock(&cpu_base->lock);
1496        }
1497}
1498
1499/*
1500 * Sleep related functions:
1501 */
1502static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1503{
1504        struct hrtimer_sleeper *t =
1505                container_of(timer, struct hrtimer_sleeper, timer);
1506        struct task_struct *task = t->task;
1507
1508        t->task = NULL;
1509        if (task)
1510                wake_up_process(task);
1511
1512        return HRTIMER_NORESTART;
1513}
1514
1515void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1516{
1517        sl->timer.function = hrtimer_wakeup;
1518        sl->task = task;
1519#ifdef CONFIG_HIGH_RES_TIMERS
1520        sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
1521#endif
1522}
1523
1524static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
1525{
1526        hrtimer_init_sleeper(t, current);
1527
1528        do {
1529                set_current_state(TASK_INTERRUPTIBLE);
1530                hrtimer_start_expires(&t->timer, mode);
1531                if (!hrtimer_active(&t->timer))
1532                        t->task = NULL;
1533
1534                if (likely(t->task))
1535                        schedule();
1536
1537                hrtimer_cancel(&t->timer);
1538                mode = HRTIMER_MODE_ABS;
1539
1540        } while (t->task && !signal_pending(current));
1541
1542        __set_current_state(TASK_RUNNING);
1543
1544        return t->task == NULL;
1545}
1546
1547static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1548{
1549        struct timespec rmt;
1550        ktime_t rem;
1551
1552        rem = hrtimer_expires_remaining(timer);
1553        if (rem.tv64 <= 0)
1554                return 0;
1555        rmt = ktime_to_timespec(rem);
1556
1557        if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
1558                return -EFAULT;
1559
1560        return 1;
1561}
1562
1563long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1564{
1565        struct hrtimer_sleeper t;
1566        struct timespec __user  *rmtp;
1567        int ret = 0;
1568
1569        hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
1570                                HRTIMER_MODE_ABS);
1571        hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1572
1573        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1574                goto out;
1575
1576        rmtp = restart->nanosleep.rmtp;
1577        if (rmtp) {
1578                ret = update_rmtp(&t.timer, rmtp);
1579                if (ret <= 0)
1580                        goto out;
1581        }
1582
1583        /* The other values in restart are already filled in */
1584        ret = -ERESTART_RESTARTBLOCK;
1585out:
1586        destroy_hrtimer_on_stack(&t.timer);
1587        return ret;
1588}
1589
1590long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1591                       const enum hrtimer_mode mode, const clockid_t clockid)
1592{
1593        struct restart_block *restart;
1594        struct hrtimer_sleeper t;
1595        int ret = 0;
1596        unsigned long slack;
1597
1598        slack = current->timer_slack_ns;
1599        if (rt_task(current))
1600                slack = 0;
1601
1602        hrtimer_init_on_stack(&t.timer, clockid, mode);
1603        hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
1604        if (do_nanosleep(&t, mode))
1605                goto out;
1606
1607        /* Absolute timers do not update the rmtp value and restart: */
1608        if (mode == HRTIMER_MODE_ABS) {
1609                ret = -ERESTARTNOHAND;
1610                goto out;
1611        }
1612
1613        if (rmtp) {
1614                ret = update_rmtp(&t.timer, rmtp);
1615                if (ret <= 0)
1616                        goto out;
1617        }
1618
1619        restart = &current_thread_info()->restart_block;
1620        restart->fn = hrtimer_nanosleep_restart;
1621        restart->nanosleep.index = t.timer.base->index;
1622        restart->nanosleep.rmtp = rmtp;
1623        restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1624
1625        ret = -ERESTART_RESTARTBLOCK;
1626out:
1627        destroy_hrtimer_on_stack(&t.timer);
1628        return ret;
1629}
1630
1631SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
1632                struct timespec __user *, rmtp)
1633{
1634        struct timespec tu;
1635
1636        if (copy_from_user(&tu, rqtp, sizeof(tu)))
1637                return -EFAULT;
1638
1639        if (!timespec_valid(&tu))
1640                return -EINVAL;
1641
1642        return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
1643}
1644
1645/*
1646 * Functions related to boot-time initialization:
1647 */
1648static void __cpuinit init_hrtimers_cpu(int cpu)
1649{
1650        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1651        int i;
1652
1653        spin_lock_init(&cpu_base->lock);
1654
1655        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1656                cpu_base->clock_base[i].cpu_base = cpu_base;
1657
1658        INIT_LIST_HEAD(&cpu_base->cb_pending);
1659        hrtimer_init_hres(cpu_base);
1660}
1661
1662#ifdef CONFIG_HOTPLUG_CPU
1663
1664static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1665                                struct hrtimer_clock_base *new_base, int dcpu)
1666{
1667        struct hrtimer *timer;
1668        struct rb_node *node;
1669        int raise = 0;
1670
1671        while ((node = rb_first(&old_base->active))) {
1672                timer = rb_entry(node, struct hrtimer, node);
1673                BUG_ON(hrtimer_callback_running(timer));
1674                debug_hrtimer_deactivate(timer);
1675
1676                /*
1677                 * Should not happen. Per CPU timers should be
1678                 * canceled _before_ the migration code is called
1679                 */
1680                if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
1681                        __remove_hrtimer(timer, old_base,
1682                                         HRTIMER_STATE_INACTIVE, 0);
1683                        WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
1684                             timer, timer->function, dcpu);
1685                        continue;
1686                }
1687
1688                /*
1689                 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
1690                 * timer could be seen as !active and just vanish away
1691                 * under us on another CPU
1692                 */
1693                __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
1694                timer->base = new_base;
1695                /*
1696                 * Enqueue the timer. Allow reprogramming of the event device
1697                 */
1698                enqueue_hrtimer(timer, new_base, 1);
1699
1700#ifdef CONFIG_HIGH_RES_TIMERS
1701                /*
1702                 * Happens with high res enabled when the timer was
1703                 * already expired and the callback mode is
1704                 * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
1705                 * enqueue code does not move them to the soft irq
1706                 * pending list for performance/latency reasons, but
1707                 * in the migration state, we need to do that
1708                 * otherwise we end up with a stale timer.
1709                 */
1710                if (timer->state == HRTIMER_STATE_MIGRATE) {
1711                        timer->state = HRTIMER_STATE_PENDING;
1712                        list_add_tail(&timer->cb_entry,
1713                                      &new_base->cpu_base->cb_pending);
1714                        raise = 1;
1715                }
1716#endif
1717                /* Clear the migration state bit */
1718                timer->state &= ~HRTIMER_STATE_MIGRATE;
1719        }
1720        return raise;
1721}
1722
1723#ifdef CONFIG_HIGH_RES_TIMERS
1724static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
1725                                   struct hrtimer_cpu_base *new_base)
1726{
1727        struct hrtimer *timer;
1728        int raise = 0;
1729
1730        while (!list_empty(&old_base->cb_pending)) {
1731                timer = list_entry(old_base->cb_pending.next,
1732                                   struct hrtimer, cb_entry);
1733
1734                __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
1735                timer->base = &new_base->clock_base[timer->base->index];
1736                list_add_tail(&timer->cb_entry, &new_base->cb_pending);
1737                raise = 1;
1738        }
1739        return raise;
1740}
1741#else
1742static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
1743                                   struct hrtimer_cpu_base *new_base)
1744{
1745        return 0;
1746}
1747#endif
1748
1749static void migrate_hrtimers(int cpu)
1750{
1751        struct hrtimer_cpu_base *old_base, *new_base;
1752        int i, raise = 0;
1753
1754        BUG_ON(cpu_online(cpu));
1755        old_base = &per_cpu(hrtimer_bases, cpu);
1756        new_base = &get_cpu_var(hrtimer_bases);
1757
1758        tick_cancel_sched_timer(cpu);
1759        /*
1760         * The caller is globally serialized and nobody else
1761         * takes two locks at once, deadlock is not possible.
1762         */
1763        spin_lock_irq(&new_base->lock);
1764        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1765
1766        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1767                if (migrate_hrtimer_list(&old_base->clock_base[i],
1768                                         &new_base->clock_base[i], cpu))
1769                        raise = 1;
1770        }
1771
1772        if (migrate_hrtimer_pending(old_base, new_base))
1773                raise = 1;
1774
1775        spin_unlock(&old_base->lock);
1776        spin_unlock_irq(&new_base->lock);
1777        put_cpu_var(hrtimer_bases);
1778
1779        if (raise)
1780                hrtimer_raise_softirq();
1781}
1782#endif /* CONFIG_HOTPLUG_CPU */
1783
1784static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1785                                        unsigned long action, void *hcpu)
1786{
1787        unsigned int cpu = (long)hcpu;
1788
1789        switch (action) {
1790
1791        case CPU_UP_PREPARE:
1792        case CPU_UP_PREPARE_FROZEN:
1793                init_hrtimers_cpu(cpu);
1794                break;
1795
1796#ifdef CONFIG_HOTPLUG_CPU
1797        case CPU_DEAD:
1798        case CPU_DEAD_FROZEN:
1799                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu);
1800                migrate_hrtimers(cpu);
1801                break;
1802#endif
1803
1804        default:
1805                break;
1806        }
1807
1808        return NOTIFY_OK;
1809}
1810
1811static struct notifier_block __cpuinitdata hrtimers_nb = {
1812        .notifier_call = hrtimer_cpu_notify,
1813};
1814
1815void __init hrtimers_init(void)
1816{
1817        hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1818                          (void *)(long)smp_processor_id());
1819        register_cpu_notifier(&hrtimers_nb);
1820#ifdef CONFIG_HIGH_RES_TIMERS
1821        open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
1822#endif
1823}
1824
1825/**
1826 * schedule_hrtimeout_range - sleep until timeout
1827 * @expires:    timeout value (ktime_t)
1828 * @delta:      slack in expires timeout (ktime_t)
1829 * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1830 *
1831 * Make the current task sleep until the given expiry time has
1832 * elapsed. The routine will return immediately unless
1833 * the current task state has been set (see set_current_state()).
1834 *
1835 * The @delta argument gives the kernel the freedom to schedule the
1836 * actual wakeup to a time that is both power and performance friendly.
1837 * The kernel give the normal best effort behavior for "@expires+@delta",
1838 * but may decide to fire the timer earlier, but no earlier than @expires.
1839 *
1840 * You can set the task state as follows -
1841 *
1842 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1843 * pass before the routine returns.
1844 *
1845 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1846 * delivered to the current task.
1847 *
1848 * The current task state is guaranteed to be TASK_RUNNING when this
1849 * routine returns.
1850 *
1851 * Returns 0 when the timer has expired otherwise -EINTR
1852 */
1853int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1854                               const enum hrtimer_mode mode)
1855{
1856        struct hrtimer_sleeper t;
1857
1858        /*
1859         * Optimize when a zero timeout value is given. It does not
1860         * matter whether this is an absolute or a relative time.
1861         */
1862        if (expires && !expires->tv64) {
1863                __set_current_state(TASK_RUNNING);
1864                return 0;
1865        }
1866
1867        /*
1868         * A NULL parameter means "inifinte"
1869         */
1870        if (!expires) {
1871                schedule();
1872                __set_current_state(TASK_RUNNING);
1873                return -EINTR;
1874        }
1875
1876        hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
1877        hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1878
1879        hrtimer_init_sleeper(&t, current);
1880
1881        hrtimer_start_expires(&t.timer, mode);
1882        if (!hrtimer_active(&t.timer))
1883                t.task = NULL;
1884
1885        if (likely(t.task))
1886                schedule();
1887
1888        hrtimer_cancel(&t.timer);
1889        destroy_hrtimer_on_stack(&t.timer);
1890
1891        __set_current_state(TASK_RUNNING);
1892
1893        return !t.task ? 0 : -EINTR;
1894}
1895EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
1896
1897/**
1898 * schedule_hrtimeout - sleep until timeout
1899 * @expires:    timeout value (ktime_t)
1900 * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1901 *
1902 * Make the current task sleep until the given expiry time has
1903 * elapsed. The routine will return immediately unless
1904 * the current task state has been set (see set_current_state()).
1905 *
1906 * You can set the task state as follows -
1907 *
1908 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1909 * pass before the routine returns.
1910 *
1911 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1912 * delivered to the current task.
1913 *
1914 * The current task state is guaranteed to be TASK_RUNNING when this
1915 * routine returns.
1916 *
1917 * Returns 0 when the timer has expired otherwise -EINTR
1918 */
1919int __sched schedule_hrtimeout(ktime_t *expires,
1920                               const enum hrtimer_mode mode)
1921{
1922        return schedule_hrtimeout_range(expires, 0, mode);
1923}
1924EXPORT_SYMBOL_GPL(schedule_hrtimeout);
1925
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.