linux/kernel/hrtimer.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/hrtimer.c
   3 *
   4 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   5 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   6 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
   7 *
   8 *  High-resolution kernel timers
   9 *
  10 *  In contrast to the low-resolution timeout API implemented in
  11 *  kernel/timer.c, hrtimers provide finer resolution and accuracy
  12 *  depending on system configuration and capabilities.
  13 *
  14 *  These timers are currently used for:
  15 *   - itimers
  16 *   - POSIX timers
  17 *   - nanosleep
  18 *   - precise in-kernel timing
  19 *
  20 *  Started by: Thomas Gleixner and Ingo Molnar
  21 *
  22 *  Credits:
  23 *      based on kernel/timer.c
  24 *
  25 *      Help, testing, suggestions, bugfixes, improvements were
  26 *      provided by:
  27 *
  28 *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
  29 *      et. al.
  30 *
  31 *  For licencing details see kernel-base/COPYING
  32 */
  33
  34#include <linux/cpu.h>
  35#include <linux/irq.h>
  36#include <linux/module.h>
  37#include <linux/percpu.h>
  38#include <linux/hrtimer.h>
  39#include <linux/notifier.h>
  40#include <linux/syscalls.h>
  41#include <linux/kallsyms.h>
  42#include <linux/interrupt.h>
  43#include <linux/tick.h>
  44#include <linux/seq_file.h>
  45#include <linux/err.h>
  46
  47#include <asm/uaccess.h>
  48
  49/**
  50 * ktime_get - get the monotonic time in ktime_t format
  51 *
  52 * returns the time in ktime_t format
  53 */
  54ktime_t ktime_get(void)
  55{
  56        struct timespec now;
  57
  58        ktime_get_ts(&now);
  59
  60        return timespec_to_ktime(now);
  61}
  62EXPORT_SYMBOL_GPL(ktime_get);
  63
  64/**
  65 * ktime_get_real - get the real (wall-) time in ktime_t format
  66 *
  67 * returns the time in ktime_t format
  68 */
  69ktime_t ktime_get_real(void)
  70{
  71        struct timespec now;
  72
  73        getnstimeofday(&now);
  74
  75        return timespec_to_ktime(now);
  76}
  77
  78EXPORT_SYMBOL_GPL(ktime_get_real);
  79
  80/*
  81 * The timer bases:
  82 *
  83 * Note: If we want to add new timer bases, we have to skip the two
  84 * clock ids captured by the cpu-timers. We do this by holding empty
  85 * entries rather than doing math adjustment of the clock ids.
  86 * This ensures that we capture erroneous accesses to these clock ids
  87 * rather than moving them into the range of valid clock id's.
  88 */
  89DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
  90{
  91
  92        .clock_base =
  93        {
  94                {
  95                        .index = CLOCK_REALTIME,
  96                        .get_time = &ktime_get_real,
  97                        .resolution = KTIME_LOW_RES,
  98                },
  99                {
 100                        .index = CLOCK_MONOTONIC,
 101                        .get_time = &ktime_get,
 102                        .resolution = KTIME_LOW_RES,
 103                },
 104        }
 105};
 106
 107/**
 108 * ktime_get_ts - get the monotonic clock in timespec format
 109 * @ts:         pointer to timespec variable
 110 *
 111 * The function calculates the monotonic clock from the realtime
 112 * clock and the wall_to_monotonic offset and stores the result
 113 * in normalized timespec format in the variable pointed to by @ts.
 114 */
 115void ktime_get_ts(struct timespec *ts)
 116{
 117        struct timespec tomono;
 118        unsigned long seq;
 119
 120        do {
 121                seq = read_seqbegin(&xtime_lock);
 122                getnstimeofday(ts);
 123                tomono = wall_to_monotonic;
 124
 125        } while (read_seqretry(&xtime_lock, seq));
 126
 127        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
 128                                ts->tv_nsec + tomono.tv_nsec);
 129}
 130EXPORT_SYMBOL_GPL(ktime_get_ts);
 131
 132/*
 133 * Get the coarse grained time at the softirq based on xtime and
 134 * wall_to_monotonic.
 135 */
 136static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 137{
 138        ktime_t xtim, tomono;
 139        struct timespec xts, tom;
 140        unsigned long seq;
 141
 142        do {
 143                seq = read_seqbegin(&xtime_lock);
 144                xts = current_kernel_time();
 145                tom = wall_to_monotonic;
 146        } while (read_seqretry(&xtime_lock, seq));
 147
 148        xtim = timespec_to_ktime(xts);
 149        tomono = timespec_to_ktime(tom);
 150        base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
 151        base->clock_base[CLOCK_MONOTONIC].softirq_time =
 152                ktime_add(xtim, tomono);
 153}
 154
 155/*
 156 * Helper function to check, whether the timer is running the callback
 157 * function
 158 */
 159static inline int hrtimer_callback_running(struct hrtimer *timer)
 160{
 161        return timer->state & HRTIMER_STATE_CALLBACK;
 162}
 163
 164/*
 165 * Functions and macros which are different for UP/SMP systems are kept in a
 166 * single place
 167 */
 168#ifdef CONFIG_SMP
 169
 170/*
 171 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 172 * means that all timers which are tied to this base via timer->base are
 173 * locked, and the base itself is locked too.
 174 *
 175 * So __run_timers/migrate_timers can safely modify all timers which could
 176 * be found on the lists/queues.
 177 *
 178 * When the timer's base is locked, and the timer removed from list, it is
 179 * possible to set timer->base = NULL and drop the lock: the timer remains
 180 * locked.
 181 */
 182static
 183struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 184                                             unsigned long *flags)
 185{
 186        struct hrtimer_clock_base *base;
 187
 188        for (;;) {
 189                base = timer->base;
 190                if (likely(base != NULL)) {
 191                        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 192                        if (likely(base == timer->base))
 193                                return base;
 194                        /* The timer has migrated to another CPU: */
 195                        spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
 196                }
 197                cpu_relax();
 198        }
 199}
 200
 201/*
 202 * Switch the timer base to the current CPU when possible.
 203 */
 204static inline struct hrtimer_clock_base *
 205switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
 206{
 207        struct hrtimer_clock_base *new_base;
 208        struct hrtimer_cpu_base *new_cpu_base;
 209
 210        new_cpu_base = &__get_cpu_var(hrtimer_bases);
 211        new_base = &new_cpu_base->clock_base[base->index];
 212
 213        if (base != new_base) {
 214                /*
 215                 * We are trying to schedule the timer on the local CPU.
 216                 * However we can't change timer's base while it is running,
 217                 * so we keep it on the same CPU. No hassle vs. reprogramming
 218                 * the event source in the high resolution case. The softirq
 219                 * code will take care of this when the timer function has
 220                 * completed. There is no conflict as we hold the lock until
 221                 * the timer is enqueued.
 222                 */
 223                if (unlikely(hrtimer_callback_running(timer)))
 224                        return base;
 225
 226                /* See the comment in lock_timer_base() */
 227                timer->base = NULL;
 228                spin_unlock(&base->cpu_base->lock);
 229                spin_lock(&new_base->cpu_base->lock);
 230                timer->base = new_base;
 231        }
 232        return new_base;
 233}
 234
 235#else /* CONFIG_SMP */
 236
 237static inline struct hrtimer_clock_base *
 238lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 239{
 240        struct hrtimer_clock_base *base = timer->base;
 241
 242        spin_lock_irqsave(&base->cpu_base->lock, *flags);
 243
 244        return base;
 245}
 246
 247# define switch_hrtimer_base(t, b)      (b)
 248
 249#endif  /* !CONFIG_SMP */
 250
 251/*
 252 * Functions for the union type storage format of ktime_t which are
 253 * too large for inlining:
 254 */
 255#if BITS_PER_LONG < 64
 256# ifndef CONFIG_KTIME_SCALAR
 257/**
 258 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
 259 * @kt:         addend
 260 * @nsec:       the scalar nsec value to add
 261 *
 262 * Returns the sum of kt and nsec in ktime_t format
 263 */
 264ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
 265{
 266        ktime_t tmp;
 267
 268        if (likely(nsec < NSEC_PER_SEC)) {
 269                tmp.tv64 = nsec;
 270        } else {
 271                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 272
 273                tmp = ktime_set((long)nsec, rem);
 274        }
 275
 276        return ktime_add(kt, tmp);
 277}
 278
 279EXPORT_SYMBOL_GPL(ktime_add_ns);
 280
 281/**
 282 * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
 283 * @kt:         minuend
 284 * @nsec:       the scalar nsec value to subtract
 285 *
 286 * Returns the subtraction of @nsec from @kt in ktime_t format
 287 */
 288ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
 289{
 290        ktime_t tmp;
 291
 292        if (likely(nsec < NSEC_PER_SEC)) {
 293                tmp.tv64 = nsec;
 294        } else {
 295                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 296
 297                tmp = ktime_set((long)nsec, rem);
 298        }
 299
 300        return ktime_sub(kt, tmp);
 301}
 302
 303EXPORT_SYMBOL_GPL(ktime_sub_ns);
 304# endif /* !CONFIG_KTIME_SCALAR */
 305
 306/*
 307 * Divide a ktime value by a nanosecond value
 308 */
 309u64 ktime_divns(const ktime_t kt, s64 div)
 310{
 311        u64 dclc, inc, dns;
 312        int sft = 0;
 313
 314        dclc = dns = ktime_to_ns(kt);
 315        inc = div;
 316        /* Make sure the divisor is less than 2^32: */
 317        while (div >> 32) {
 318                sft++;
 319                div >>= 1;
 320        }
 321        dclc >>= sft;
 322        do_div(dclc, (unsigned long) div);
 323
 324        return dclc;
 325}
 326#endif /* BITS_PER_LONG >= 64 */
 327
 328/*
 329 * Add two ktime values and do a safety check for overflow:
 330 */
 331ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
 332{
 333        ktime_t res = ktime_add(lhs, rhs);
 334
 335        /*
 336         * We use KTIME_SEC_MAX here, the maximum timeout which we can
 337         * return to user space in a timespec:
 338         */
 339        if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
 340                res = ktime_set(KTIME_SEC_MAX, 0);
 341
 342        return res;
 343}
 344
 345/*
 346 * Check, whether the timer is on the callback pending list
 347 */
 348static inline int hrtimer_cb_pending(const struct hrtimer *timer)
 349{
 350        return timer->state & HRTIMER_STATE_PENDING;
 351}
 352
 353/*
 354 * Remove a timer from the callback pending list
 355 */
 356static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
 357{
 358        list_del_init(&timer->cb_entry);
 359}
 360
 361/* High resolution timer related functions */
 362#ifdef CONFIG_HIGH_RES_TIMERS
 363
 364/*
 365 * High resolution timer enabled ?
 366 */
 367static int hrtimer_hres_enabled __read_mostly  = 1;
 368
 369/*
 370 * Enable / Disable high resolution mode
 371 */
 372static int __init setup_hrtimer_hres(char *str)
 373{
 374        if (!strcmp(str, "off"))
 375                hrtimer_hres_enabled = 0;
 376        else if (!strcmp(str, "on"))
 377                hrtimer_hres_enabled = 1;
 378        else
 379                return 0;
 380        return 1;
 381}
 382
 383__setup("highres=", setup_hrtimer_hres);
 384
 385/*
 386 * hrtimer_high_res_enabled - query, if the highres mode is enabled
 387 */
 388static inline int hrtimer_is_hres_enabled(void)
 389{
 390        return hrtimer_hres_enabled;
 391}
 392
 393/*
 394 * Is the high resolution mode active ?
 395 */
 396static inline int hrtimer_hres_active(void)
 397{
 398        return __get_cpu_var(hrtimer_bases).hres_active;
 399}
 400
 401/*
 402 * Reprogram the event source with checking both queues for the
 403 * next event
 404 * Called with interrupts disabled and base->lock held
 405 */
 406static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
 407{
 408        int i;
 409        struct hrtimer_clock_base *base = cpu_base->clock_base;
 410        ktime_t expires;
 411
 412        cpu_base->expires_next.tv64 = KTIME_MAX;
 413
 414        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 415                struct hrtimer *timer;
 416
 417                if (!base->first)
 418                        continue;
 419                timer = rb_entry(base->first, struct hrtimer, node);
 420                expires = ktime_sub(timer->expires, base->offset);
 421                if (expires.tv64 < cpu_base->expires_next.tv64)
 422                        cpu_base->expires_next = expires;
 423        }
 424
 425        if (cpu_base->expires_next.tv64 != KTIME_MAX)
 426                tick_program_event(cpu_base->expires_next, 1);
 427}
 428
 429/*
 430 * Shared reprogramming for clock_realtime and clock_monotonic
 431 *
 432 * When a timer is enqueued and expires earlier than the already enqueued
 433 * timers, we have to check, whether it expires earlier than the timer for
 434 * which the clock event device was armed.
 435 *
 436 * Called with interrupts disabled and base->cpu_base.lock held
 437 */
 438static int hrtimer_reprogram(struct hrtimer *timer,
 439                             struct hrtimer_clock_base *base)
 440{
 441        ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
 442        ktime_t expires = ktime_sub(timer->expires, base->offset);
 443        int res;
 444
 445        WARN_ON_ONCE(timer->expires.tv64 < 0);
 446
 447        /*
 448         * When the callback is running, we do not reprogram the clock event
 449         * device. The timer callback is either running on a different CPU or
 450         * the callback is executed in the hrtimer_interrupt context. The
 451         * reprogramming is handled either by the softirq, which called the
 452         * callback or at the end of the hrtimer_interrupt.
 453         */
 454        if (hrtimer_callback_running(timer))
 455                return 0;
 456
 457        /*
 458         * CLOCK_REALTIME timer might be requested with an absolute
 459         * expiry time which is less than base->offset. Nothing wrong
 460         * about that, just avoid to call into the tick code, which
 461         * has now objections against negative expiry values.
 462         */
 463        if (expires.tv64 < 0)
 464                return -ETIME;
 465
 466        if (expires.tv64 >= expires_next->tv64)
 467                return 0;
 468
 469        /*
 470         * Clockevents returns -ETIME, when the event was in the past.
 471         */
 472        res = tick_program_event(expires, 0);
 473        if (!IS_ERR_VALUE(res))
 474                *expires_next = expires;
 475        return res;
 476}
 477
 478
 479/*
 480 * Retrigger next event is called after clock was set
 481 *
 482 * Called with interrupts disabled via on_each_cpu()
 483 */
 484static void retrigger_next_event(void *arg)
 485{
 486        struct hrtimer_cpu_base *base;
 487        struct timespec realtime_offset;
 488        unsigned long seq;
 489
 490        if (!hrtimer_hres_active())
 491                return;
 492
 493        do {
 494                seq = read_seqbegin(&xtime_lock);
 495                set_normalized_timespec(&realtime_offset,
 496                                        -wall_to_monotonic.tv_sec,
 497                                        -wall_to_monotonic.tv_nsec);
 498        } while (read_seqretry(&xtime_lock, seq));
 499
 500        base = &__get_cpu_var(hrtimer_bases);
 501
 502        /* Adjust CLOCK_REALTIME offset */
 503        spin_lock(&base->lock);
 504        base->clock_base[CLOCK_REALTIME].offset =
 505                timespec_to_ktime(realtime_offset);
 506
 507        hrtimer_force_reprogram(base);
 508        spin_unlock(&base->lock);
 509}
 510
 511/*
 512 * Clock realtime was set
 513 *
 514 * Change the offset of the realtime clock vs. the monotonic
 515 * clock.
 516 *
 517 * We might have to reprogram the high resolution timer interrupt. On
 518 * SMP we call the architecture specific code to retrigger _all_ high
 519 * resolution timer interrupts. On UP we just disable interrupts and
 520 * call the high resolution interrupt code.
 521 */
 522void clock_was_set(void)
 523{
 524        /* Retrigger the CPU local events everywhere */
 525        on_each_cpu(retrigger_next_event, NULL, 0, 1);
 526}
 527
 528/*
 529 * During resume we might have to reprogram the high resolution timer
 530 * interrupt (on the local CPU):
 531 */
 532void hres_timers_resume(void)
 533{
 534        WARN_ON_ONCE(num_online_cpus() > 1);
 535
 536        /* Retrigger the CPU local events: */
 537        retrigger_next_event(NULL);
 538}
 539
 540/*
 541 * Initialize the high resolution related parts of cpu_base
 542 */
 543static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
 544{
 545        base->expires_next.tv64 = KTIME_MAX;
 546        base->hres_active = 0;
 547}
 548
 549/*
 550 * Initialize the high resolution related parts of a hrtimer
 551 */
 552static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 553{
 554}
 555
 556/*
 557 * When High resolution timers are active, try to reprogram. Note, that in case
 558 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
 559 * check happens. The timer gets enqueued into the rbtree. The reprogramming
 560 * and expiry check is done in the hrtimer_interrupt or in the softirq.
 561 */
 562static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 563                                            struct hrtimer_clock_base *base)
 564{
 565        if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
 566
 567                /* Timer is expired, act upon the callback mode */
 568                switch(timer->cb_mode) {
 569                case HRTIMER_CB_IRQSAFE_NO_RESTART:
 570                        /*
 571                         * We can call the callback from here. No restart
 572                         * happens, so no danger of recursion
 573                         */
 574                        BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
 575                        return 1;
 576                case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
 577                        /*
 578                         * This is solely for the sched tick emulation with
 579                         * dynamic tick support to ensure that we do not
 580                         * restart the tick right on the edge and end up with
 581                         * the tick timer in the softirq ! The calling site
 582                         * takes care of this.
 583                         */
 584                        return 1;
 585                case HRTIMER_CB_IRQSAFE:
 586                case HRTIMER_CB_SOFTIRQ:
 587                        /*
 588                         * Move everything else into the softirq pending list !
 589                         */
 590                        list_add_tail(&timer->cb_entry,
 591                                      &base->cpu_base->cb_pending);
 592                        timer->state = HRTIMER_STATE_PENDING;
 593                        return 1;
 594                default:
 595                        BUG();
 596                }
 597        }
 598        return 0;
 599}
 600
 601/*
 602 * Switch to high resolution mode
 603 */
 604static int hrtimer_switch_to_hres(void)
 605{
 606        int cpu = smp_processor_id();
 607        struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 608        unsigned long flags;
 609
 610        if (base->hres_active)
 611                return 1;
 612
 613        local_irq_save(flags);
 614
 615        if (tick_init_highres()) {
 616                local_irq_restore(flags);
 617                printk(KERN_WARNING "Could not switch to high resolution "
 618                                    "mode on CPU %d\n", cpu);
 619                return 0;
 620        }
 621        base->hres_active = 1;
 622        base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
 623        base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
 624
 625        tick_setup_sched_timer();
 626
 627        /* "Retrigger" the interrupt to get things going */
 628        retrigger_next_event(NULL);
 629        local_irq_restore(flags);
 630        printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
 631               smp_processor_id());
 632        return 1;
 633}
 634
 635static inline void hrtimer_raise_softirq(void)
 636{
 637        raise_softirq(HRTIMER_SOFTIRQ);
 638}
 639
 640#else
 641
 642static inline int hrtimer_hres_active(void) { return 0; }
 643static inline int hrtimer_is_hres_enabled(void) { return 0; }
 644static inline int hrtimer_switch_to_hres(void) { return 0; }
 645static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
 646static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 647                                            struct hrtimer_clock_base *base)
 648{
 649        return 0;
 650}
 651static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 652static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
 653static inline int hrtimer_reprogram(struct hrtimer *timer,
 654                                    struct hrtimer_clock_base *base)
 655{
 656        return 0;
 657}
 658static inline void hrtimer_raise_softirq(void) { }
 659
 660#endif /* CONFIG_HIGH_RES_TIMERS */
 661
 662#ifdef CONFIG_TIMER_STATS
 663void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
 664{
 665        if (timer->start_site)
 666                return;
 667
 668        timer->start_site = addr;
 669        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 670        timer->start_pid = current->pid;
 671}
 672#endif
 673
 674/*
 675 * Counterpart to lock_hrtimer_base above:
 676 */
 677static inline
 678void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 679{
 680        spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
 681}
 682
 683/**
 684 * hrtimer_forward - forward the timer expiry
 685 * @timer:      hrtimer to forward
 686 * @now:        forward past this time
 687 * @interval:   the interval to forward
 688 *
 689 * Forward the timer expiry so it will expire in the future.
 690 * Returns the number of overruns.
 691 */
 692u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 693{
 694        u64 orun = 1;
 695        ktime_t delta;
 696
 697        delta = ktime_sub(now, timer->expires);
 698
 699        if (delta.tv64 < 0)
 700                return 0;
 701
 702        if (interval.tv64 < timer->base->resolution.tv64)
 703                interval.tv64 = timer->base->resolution.tv64;
 704
 705        if (unlikely(delta.tv64 >= interval.tv64)) {
 706                s64 incr = ktime_to_ns(interval);
 707
 708                orun = ktime_divns(delta, incr);
 709                timer->expires = ktime_add_ns(timer->expires, incr * orun);
 710                if (timer->expires.tv64 > now.tv64)
 711                        return orun;
 712                /*
 713                 * This (and the ktime_add() below) is the
 714                 * correction for exact:
 715                 */
 716                orun++;
 717        }
 718        timer->expires = ktime_add_safe(timer->expires, interval);
 719
 720        return orun;
 721}
 722EXPORT_SYMBOL_GPL(hrtimer_forward);
 723
 724/*
 725 * enqueue_hrtimer - internal function to (re)start a timer
 726 *
 727 * The timer is inserted in expiry order. Insertion into the
 728 * red black tree is O(log(n)). Must hold the base lock.
 729 */
 730static void enqueue_hrtimer(struct hrtimer *timer,
 731                            struct hrtimer_clock_base *base, int reprogram)
 732{
 733        struct rb_node **link = &base->active.rb_node;
 734        struct rb_node *parent = NULL;
 735        struct hrtimer *entry;
 736        int leftmost = 1;
 737
 738        /*
 739         * Find the right place in the rbtree:
 740         */
 741        while (*link) {
 742                parent = *link;
 743                entry = rb_entry(parent, struct hrtimer, node);
 744                /*
 745                 * We dont care about collisions. Nodes with
 746                 * the same expiry time stay together.
 747                 */
 748                if (timer->expires.tv64 < entry->expires.tv64) {
 749                        link = &(*link)->rb_left;
 750                } else {
 751                        link = &(*link)->rb_right;
 752                        leftmost = 0;
 753                }
 754        }
 755
 756        /*
 757         * Insert the timer to the rbtree and check whether it
 758         * replaces the first pending timer
 759         */
 760        if (leftmost) {
 761                /*
 762                 * Reprogram the clock event device. When the timer is already
 763                 * expired hrtimer_enqueue_reprogram has either called the
 764                 * callback or added it to the pending list and raised the
 765                 * softirq.
 766                 *
 767                 * This is a NOP for !HIGHRES
 768                 */
 769                if (reprogram && hrtimer_enqueue_reprogram(timer, base))
 770                        return;
 771
 772                base->first = &timer->node;
 773        }
 774
 775        rb_link_node(&timer->node, parent, link);
 776        rb_insert_color(&timer->node, &base->active);
 777        /*
 778         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
 779         * state of a possibly running callback.
 780         */
 781        timer->state |= HRTIMER_STATE_ENQUEUED;
 782}
 783
 784/*
 785 * __remove_hrtimer - internal function to remove a timer
 786 *
 787 * Caller must hold the base lock.
 788 *
 789 * High resolution timer mode reprograms the clock event device when the
 790 * timer is the one which expires next. The caller can disable this by setting
 791 * reprogram to zero. This is useful, when the context does a reprogramming
 792 * anyway (e.g. timer interrupt)
 793 */
 794static void __remove_hrtimer(struct hrtimer *timer,
 795                             struct hrtimer_clock_base *base,
 796                             unsigned long newstate, int reprogram)
 797{
 798        /* High res. callback list. NOP for !HIGHRES */
 799        if (hrtimer_cb_pending(timer))
 800                hrtimer_remove_cb_pending(timer);
 801        else {
 802                /*
 803                 * Remove the timer from the rbtree and replace the
 804                 * first entry pointer if necessary.
 805                 */
 806                if (base->first == &timer->node) {
 807                        base->first = rb_next(&timer->node);
 808                        /* Reprogram the clock event device. if enabled */
 809                        if (reprogram && hrtimer_hres_active())
 810                                hrtimer_force_reprogram(base->cpu_base);
 811                }
 812                rb_erase(&timer->node, &base->active);
 813        }
 814        timer->state = newstate;
 815}
 816
 817/*
 818 * remove hrtimer, called with base lock held
 819 */
 820static inline int
 821remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 822{
 823        if (hrtimer_is_queued(timer)) {
 824                int reprogram;
 825
 826                /*
 827                 * Remove the timer and force reprogramming when high
 828                 * resolution mode is active and the timer is on the current
 829                 * CPU. If we remove a timer on another CPU, reprogramming is
 830                 * skipped. The interrupt event on this CPU is fired and
 831                 * reprogramming happens in the interrupt handler. This is a
 832                 * rare case and less expensive than a smp call.
 833                 */
 834                timer_stats_hrtimer_clear_start_info(timer);
 835                reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 836                __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
 837                                 reprogram);
 838                return 1;
 839        }
 840        return 0;
 841}
 842
 843/**
 844 * hrtimer_start - (re)start an relative timer on the current CPU
 845 * @timer:      the timer to be added
 846 * @tim:        expiry time
 847 * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 848 *
 849 * Returns:
 850 *  0 on success
 851 *  1 when the timer was active
 852 */
 853int
 854hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 855{
 856        struct hrtimer_clock_base *base, *new_base;
 857        unsigned long flags;
 858        int ret, raise;
 859
 860        base = lock_hrtimer_base(timer, &flags);
 861
 862        /* Remove an active timer from the queue: */
 863        ret = remove_hrtimer(timer, base);
 864
 865        /* Switch the timer base, if necessary: */
 866        new_base = switch_hrtimer_base(timer, base);
 867
 868        if (mode == HRTIMER_MODE_REL) {
 869                tim = ktime_add_safe(tim, new_base->get_time());
 870                /*
 871                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
 872                 * to signal that they simply return xtime in
 873                 * do_gettimeoffset(). In this case we want to round up by
 874                 * resolution when starting a relative timer, to avoid short
 875                 * timeouts. This will go away with the GTOD framework.
 876                 */
 877#ifdef CONFIG_TIME_LOW_RES
 878                tim = ktime_add_safe(tim, base->resolution);
 879#endif
 880        }
 881        timer->expires = tim;
 882
 883        timer_stats_hrtimer_set_start_info(timer);
 884
 885        /*
 886         * Only allow reprogramming if the new base is on this CPU.
 887         * (it might still be on another CPU if the timer was pending)
 888         */
 889        enqueue_hrtimer(timer, new_base,
 890                        new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 891
 892        /*
 893         * The timer may be expired and moved to the cb_pending
 894         * list. We can not raise the softirq with base lock held due
 895         * to a possible deadlock with runqueue lock.
 896         */
 897        raise = timer->state == HRTIMER_STATE_PENDING;
 898
 899        unlock_hrtimer_base(timer, &flags);
 900
 901        if (raise)
 902                hrtimer_raise_softirq();
 903
 904        return ret;
 905}
 906EXPORT_SYMBOL_GPL(hrtimer_start);
 907
 908/**
 909 * hrtimer_try_to_cancel - try to deactivate a timer
 910 * @timer:      hrtimer to stop
 911 *
 912 * Returns:
 913 *  0 when the timer was not active
 914 *  1 when the timer was active
 915 * -1 when the timer is currently excuting the callback function and
 916 *    cannot be stopped
 917 */
 918int hrtimer_try_to_cancel(struct hrtimer *timer)
 919{
 920        struct hrtimer_clock_base *base;
 921        unsigned long flags;
 922        int ret = -1;
 923
 924        base = lock_hrtimer_base(timer, &flags);
 925
 926        if (!hrtimer_callback_running(timer))
 927                ret = remove_hrtimer(timer, base);
 928
 929        unlock_hrtimer_base(timer, &flags);
 930
 931        return ret;
 932
 933}
 934EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 935
 936/**
 937 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
 938 * @timer:      the timer to be cancelled
 939 *
 940 * Returns:
 941 *  0 when the timer was not active
 942 *  1 when the timer was active
 943 */
 944int hrtimer_cancel(struct hrtimer *timer)
 945{
 946        for (;;) {
 947                int ret = hrtimer_try_to_cancel(timer);
 948
 949                if (ret >= 0)
 950                        return ret;
 951                cpu_relax();
 952        }
 953}
 954EXPORT_SYMBOL_GPL(hrtimer_cancel);
 955
 956/**
 957 * hrtimer_get_remaining - get remaining time for the timer
 958 * @timer:      the timer to read
 959 */
 960ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 961{
 962        struct hrtimer_clock_base *base;
 963        unsigned long flags;
 964        ktime_t rem;
 965
 966        base = lock_hrtimer_base(timer, &flags);
 967        rem = ktime_sub(timer->expires, base->get_time());
 968        unlock_hrtimer_base(timer, &flags);
 969
 970        return rem;
 971}
 972EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 973
 974#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
 975/**
 976 * hrtimer_get_next_event - get the time until next expiry event
 977 *
 978 * Returns the delta to the next expiry event or KTIME_MAX if no timer
 979 * is pending.
 980 */
 981ktime_t hrtimer_get_next_event(void)
 982{
 983        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 984        struct hrtimer_clock_base *base = cpu_base->clock_base;
 985        ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
 986        unsigned long flags;
 987        int i;
 988
 989        spin_lock_irqsave(&cpu_base->lock, flags);
 990
 991        if (!hrtimer_hres_active()) {
 992                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 993                        struct hrtimer *timer;
 994
 995                        if (!base->first)
 996                                continue;
 997
 998                        timer = rb_entry(base->first, struct hrtimer, node);
 999                        delta.tv64 = timer->expires.tv64;
1000                        delta = ktime_sub(delta, base->get_time());
1001                        if (delta.tv64 < mindelta.tv64)
1002                                mindelta.tv64 = delta.tv64;
1003                }
1004        }
1005
1006        spin_unlock_irqrestore(&cpu_base->lock, flags);
1007
1008        if (mindelta.tv64 < 0)
1009                mindelta.tv64 = 0;
1010        return mindelta;
1011}
1012#endif
1013
1014/**
1015 * hrtimer_init - initialize a timer to the given clock
1016 * @timer:      the timer to be initialized
1017 * @clock_id:   the clock to be used
1018 * @mode:       timer mode abs/rel
1019 */
1020void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1021                  enum hrtimer_mode mode)
1022{
1023        struct hrtimer_cpu_base *cpu_base;
1024
1025        memset(timer, 0, sizeof(struct hrtimer));
1026
1027        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1028
1029        if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1030                clock_id = CLOCK_MONOTONIC;
1031
1032        timer->base = &cpu_base->clock_base[clock_id];
1033        INIT_LIST_HEAD(&timer->cb_entry);
1034        hrtimer_init_timer_hres(timer);
1035
1036#ifdef CONFIG_TIMER_STATS
1037        timer->start_site = NULL;
1038        timer->start_pid = -1;
1039        memset(timer->start_comm, 0, TASK_COMM_LEN);
1040#endif
1041}
1042EXPORT_SYMBOL_GPL(hrtimer_init);
1043
1044/**
1045 * hrtimer_get_res - get the timer resolution for a clock
1046 * @which_clock: which clock to query
1047 * @tp:          pointer to timespec variable to store the resolution
1048 *
1049 * Store the resolution of the clock selected by @which_clock in the
1050 * variable pointed to by @tp.
1051 */
1052int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1053{
1054        struct hrtimer_cpu_base *cpu_base;
1055
1056        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1057        *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution);
1058
1059        return 0;
1060}
1061EXPORT_SYMBOL_GPL(hrtimer_get_res);
1062
1063static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1064{
1065        spin_lock_irq(&cpu_base->lock);
1066
1067        while (!list_empty(&cpu_base->cb_pending)) {
1068                enum hrtimer_restart (*fn)(struct hrtimer *);
1069                struct hrtimer *timer;
1070                int restart;
1071
1072                timer = list_entry(cpu_base->cb_pending.next,
1073                                   struct hrtimer, cb_entry);
1074
1075                timer_stats_account_hrtimer(timer);
1076
1077                fn = timer->function;
1078                __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1079                spin_unlock_irq(&cpu_base->lock);
1080
1081                restart = fn(timer);
1082
1083                spin_lock_irq(&cpu_base->lock);
1084
1085                timer->state &= ~HRTIMER_STATE_CALLBACK;
1086                if (restart == HRTIMER_RESTART) {
1087                        BUG_ON(hrtimer_active(timer));
1088                        /*
1089                         * Enqueue the timer, allow reprogramming of the event
1090                         * device
1091                         */
1092                        enqueue_hrtimer(timer, timer->base, 1);
1093                } else if (hrtimer_active(timer)) {
1094                        /*
1095                         * If the timer was rearmed on another CPU, reprogram
1096                         * the event device.
1097                         */
1098                        struct hrtimer_clock_base *base = timer->base;
1099
1100                        if (base->first == &timer->node &&
1101                            hrtimer_reprogram(timer, base)) {
1102                                /*
1103                                 * Timer is expired. Thus move it from tree to
1104                                 * pending list again.
1105                                 */
1106                                __remove_hrtimer(timer, base,
1107                                                 HRTIMER_STATE_PENDING, 0);
1108                                list_add_tail(&timer->cb_entry,
1109                                              &base->cpu_base->cb_pending);
1110                        }
1111                }
1112        }
1113        spin_unlock_irq(&cpu_base->lock);
1114}
1115
1116static void __run_hrtimer(struct hrtimer *timer)
1117{
1118        struct hrtimer_clock_base *base = timer->base;
1119        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1120        enum hrtimer_restart (*fn)(struct hrtimer *);
1121        int restart;
1122
1123        __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1124        timer_stats_account_hrtimer(timer);
1125
1126        fn = timer->function;
1127        if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
1128                /*
1129                 * Used for scheduler timers, avoid lock inversion with
1130                 * rq->lock and tasklist_lock.
1131                 *
1132                 * These timers are required to deal with enqueue expiry
1133                 * themselves and are not allowed to migrate.
1134                 */
1135                spin_unlock(&cpu_base->lock);
1136                restart = fn(timer);
1137                spin_lock(&cpu_base->lock);
1138        } else
1139                restart = fn(timer);
1140
1141        /*
1142         * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
1143         * reprogramming of the event hardware. This happens at the end of this
1144         * function anyway.
1145         */
1146        if (restart != HRTIMER_NORESTART) {
1147                BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
1148                enqueue_hrtimer(timer, base, 0);
1149        }
1150        timer->state &= ~HRTIMER_STATE_CALLBACK;
1151}
1152
1153#ifdef CONFIG_HIGH_RES_TIMERS
1154
1155/*
1156 * High resolution timer interrupt
1157 * Called with interrupts disabled
1158 */
1159void hrtimer_interrupt(struct clock_event_device *dev)
1160{
1161        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1162        struct hrtimer_clock_base *base;
1163        ktime_t expires_next, now;
1164        int i, raise = 0;
1165
1166        BUG_ON(!cpu_base->hres_active);
1167        cpu_base->nr_events++;
1168        dev->next_event.tv64 = KTIME_MAX;
1169
1170 retry:
1171        now = ktime_get();
1172
1173        expires_next.tv64 = KTIME_MAX;
1174
1175        base = cpu_base->clock_base;
1176
1177        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1178                ktime_t basenow;
1179                struct rb_node *node;
1180
1181                spin_lock(&cpu_base->lock);
1182
1183                basenow = ktime_add(now, base->offset);
1184
1185                while ((node = base->first)) {
1186                        struct hrtimer *timer;
1187
1188                        timer = rb_entry(node, struct hrtimer, node);
1189
1190                        if (basenow.tv64 < timer->expires.tv64) {
1191                                ktime_t expires;
1192
1193                                expires = ktime_sub(timer->expires,
1194                                                    base->offset);
1195                                if (expires.tv64 < expires_next.tv64)
1196                                        expires_next = expires;
1197                                break;
1198                        }
1199
1200                        /* Move softirq callbacks to the pending list */
1201                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1202                                __remove_hrtimer(timer, base,
1203                                                 HRTIMER_STATE_PENDING, 0);
1204                                list_add_tail(&timer->cb_entry,
1205                                              &base->cpu_base->cb_pending);
1206                                raise = 1;
1207                                continue;
1208                        }
1209
1210                        __run_hrtimer(timer);
1211                }
1212                spin_unlock(&cpu_base->lock);
1213                base++;
1214        }
1215
1216        cpu_base->expires_next = expires_next;
1217
1218        /* Reprogramming necessary ? */
1219        if (expires_next.tv64 != KTIME_MAX) {
1220                if (tick_program_event(expires_next, 0))
1221                        goto retry;
1222        }
1223
1224        /* Raise softirq ? */
1225        if (raise)
1226                raise_softirq(HRTIMER_SOFTIRQ);
1227}
1228
1229static void run_hrtimer_softirq(struct softirq_action *h)
1230{
1231        run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
1232}
1233
1234#endif  /* CONFIG_HIGH_RES_TIMERS */
1235
1236/*
1237 * Called from timer softirq every jiffy, expire hrtimers:
1238 *
1239 * For HRT its the fall back code to run the softirq in the timer
1240 * softirq context in case the hrtimer initialization failed or has
1241 * not been done yet.
1242 */
1243void hrtimer_run_pending(void)
1244{
1245        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1246
1247        if (hrtimer_hres_active())
1248                return;
1249
1250        /*
1251         * This _is_ ugly: We have to check in the softirq context,
1252         * whether we can switch to highres and / or nohz mode. The
1253         * clocksource switch happens in the timer interrupt with
1254         * xtime_lock held. Notification from there only sets the
1255         * check bit in the tick_oneshot code, otherwise we might
1256         * deadlock vs. xtime_lock.
1257         */
1258        if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1259                hrtimer_switch_to_hres();
1260
1261        run_hrtimer_pending(cpu_base);
1262}
1263
1264/*
1265 * Called from hardirq context every jiffy
1266 */
1267static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
1268                                     int index)
1269{
1270        struct rb_node *node;
1271        struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
1272
1273        if (!base->first)
1274                return;
1275
1276        if (base->get_softirq_time)
1277                base->softirq_time = base->get_softirq_time();
1278
1279        spin_lock(&cpu_base->lock);
1280
1281        while ((node = base->first)) {
1282                struct hrtimer *timer;
1283
1284                timer = rb_entry(node, struct hrtimer, node);
1285                if (base->softirq_time.tv64 <= timer->expires.tv64)
1286                        break;
1287
1288                if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
1289                        __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
1290                        list_add_tail(&timer->cb_entry,
1291                                        &base->cpu_base->cb_pending);
1292                        continue;
1293                }
1294
1295                __run_hrtimer(timer);
1296        }
1297        spin_unlock(&cpu_base->lock);
1298}
1299
1300void hrtimer_run_queues(void)
1301{
1302        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1303        int i;
1304
1305        if (hrtimer_hres_active())
1306                return;
1307
1308        hrtimer_get_softirq_time(cpu_base);
1309
1310        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1311                run_hrtimer_queue(cpu_base, i);
1312}
1313
1314/*
1315 * Sleep related functions:
1316 */
1317static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1318{
1319        struct hrtimer_sleeper *t =
1320                container_of(timer, struct hrtimer_sleeper, timer);
1321        struct task_struct *task = t->task;
1322
1323        t->task = NULL;
1324        if (task)
1325                wake_up_process(task);
1326
1327        return HRTIMER_NORESTART;
1328}
1329
1330void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1331{
1332        sl->timer.function = hrtimer_wakeup;
1333        sl->task = task;
1334#ifdef CONFIG_HIGH_RES_TIMERS
1335        sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1336#endif
1337}
1338
1339static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
1340{
1341        hrtimer_init_sleeper(t, current);
1342
1343        do {
1344                set_current_state(TASK_INTERRUPTIBLE);
1345                hrtimer_start(&t->timer, t->timer.expires, mode);
1346                if (!hrtimer_active(&t->timer))
1347                        t->task = NULL;
1348
1349                if (likely(t->task))
1350                        schedule();
1351
1352                hrtimer_cancel(&t->timer);
1353                mode = HRTIMER_MODE_ABS;
1354
1355        } while (t->task && !signal_pending(current));
1356
1357        __set_current_state(TASK_RUNNING);
1358
1359        return t->task == NULL;
1360}
1361
1362static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1363{
1364        struct timespec rmt;
1365        ktime_t rem;
1366
1367        rem = ktime_sub(timer->expires, timer->base->get_time());
1368        if (rem.tv64 <= 0)
1369                return 0;
1370        rmt = ktime_to_timespec(rem);
1371
1372        if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
1373                return -EFAULT;
1374
1375        return 1;
1376}
1377
1378long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1379{
1380        struct hrtimer_sleeper t;
1381        struct timespec __user  *rmtp;
1382
1383        hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS);
1384        t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2;
1385
1386        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1387                return 0;
1388
1389        rmtp = (struct timespec __user *)restart->arg1;
1390        if (rmtp) {
1391                int ret = update_rmtp(&t.timer, rmtp);
1392                if (ret <= 0)
1393                        return ret;
1394        }
1395
1396        /* The other values in restart are already filled in */
1397        return -ERESTART_RESTARTBLOCK;
1398}
1399
1400long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1401                       const enum hrtimer_mode mode, const clockid_t clockid)
1402{
1403        struct restart_block *restart;
1404        struct hrtimer_sleeper t;
1405
1406        hrtimer_init(&t.timer, clockid, mode);
1407        t.timer.expires = timespec_to_ktime(*rqtp);
1408        if (do_nanosleep(&t, mode))
1409                return 0;
1410
1411        /* Absolute timers do not update the rmtp value and restart: */
1412        if (mode == HRTIMER_MODE_ABS)
1413                return -ERESTARTNOHAND;
1414
1415        if (rmtp) {
1416                int ret = update_rmtp(&t.timer, rmtp);
1417                if (ret <= 0)
1418                        return ret;
1419        }
1420
1421        restart = &current_thread_info()->restart_block;
1422        restart->fn = hrtimer_nanosleep_restart;
1423        restart->arg0 = (unsigned long) t.timer.base->index;
1424        restart->arg1 = (unsigned long) rmtp;
1425        restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF;
1426        restart->arg3 = t.timer.expires.tv64 >> 32;
1427
1428        return -ERESTART_RESTARTBLOCK;
1429}
1430
1431asmlinkage long
1432sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
1433{
1434        struct timespec tu;
1435
1436        if (copy_from_user(&tu, rqtp, sizeof(tu)))
1437                return -EFAULT;
1438
1439        if (!timespec_valid(&tu))
1440                return -EINVAL;
1441
1442        return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
1443}
1444
1445/*
1446 * Functions related to boot-time initialization:
1447 */
1448static void __cpuinit init_hrtimers_cpu(int cpu)
1449{
1450        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1451        int i;
1452
1453        spin_lock_init(&cpu_base->lock);
1454        lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key);
1455
1456        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1457                cpu_base->clock_base[i].cpu_base = cpu_base;
1458
1459        INIT_LIST_HEAD(&cpu_base->cb_pending);
1460        hrtimer_init_hres(cpu_base);
1461}
1462
1463#ifdef CONFIG_HOTPLUG_CPU
1464
1465static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1466                                struct hrtimer_clock_base *new_base)
1467{
1468        struct hrtimer *timer;
1469        struct rb_node *node;
1470
1471        while ((node = rb_first(&old_base->active))) {
1472                timer = rb_entry(node, struct hrtimer, node);
1473                BUG_ON(hrtimer_callback_running(timer));
1474                __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
1475                timer->base = new_base;
1476                /*
1477                 * Enqueue the timer. Allow reprogramming of the event device
1478                 */
1479                enqueue_hrtimer(timer, new_base, 1);
1480        }
1481}
1482
1483static void migrate_hrtimers(int cpu)
1484{
1485        struct hrtimer_cpu_base *old_base, *new_base;
1486        int i;
1487
1488        BUG_ON(cpu_online(cpu));
1489        old_base = &per_cpu(hrtimer_bases, cpu);
1490        new_base = &get_cpu_var(hrtimer_bases);
1491
1492        tick_cancel_sched_timer(cpu);
1493
1494        local_irq_disable();
1495        double_spin_lock(&new_base->lock, &old_base->lock,
1496                         smp_processor_id() < cpu);
1497
1498        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1499                migrate_hrtimer_list(&old_base->clock_base[i],
1500                                     &new_base->clock_base[i]);
1501        }
1502
1503        double_spin_unlock(&new_base->lock, &old_base->lock,
1504                           smp_processor_id() < cpu);
1505        local_irq_enable();
1506        put_cpu_var(hrtimer_bases);
1507}
1508#endif /* CONFIG_HOTPLUG_CPU */
1509
1510static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1511                                        unsigned long action, void *hcpu)
1512{
1513        unsigned int cpu = (long)hcpu;
1514
1515        switch (action) {
1516
1517        case CPU_UP_PREPARE:
1518        case CPU_UP_PREPARE_FROZEN:
1519                init_hrtimers_cpu(cpu);
1520                break;
1521
1522#ifdef CONFIG_HOTPLUG_CPU
1523        case CPU_DEAD:
1524        case CPU_DEAD_FROZEN:
1525                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu);
1526                migrate_hrtimers(cpu);
1527                break;
1528#endif
1529
1530        default:
1531                break;
1532        }
1533
1534        return NOTIFY_OK;
1535}
1536
1537static struct notifier_block __cpuinitdata hrtimers_nb = {
1538        .notifier_call = hrtimer_cpu_notify,
1539};
1540
1541void __init hrtimers_init(void)
1542{
1543        hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1544                          (void *)(long)smp_processor_id());
1545        register_cpu_notifier(&hrtimers_nb);
1546#ifdef CONFIG_HIGH_RES_TIMERS
1547        open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
1548#endif
1549}
1550
1551
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.