linux/kernel/timer.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/timer.c
   3 *
   4 *  Kernel internal timers, basic process system calls
   5 *
   6 *  Copyright (C) 1991, 1992  Linus Torvalds
   7 *
   8 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9 *
  10 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11 *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12 *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13 *              serialize accesses to xtime/lost_ticks).
  14 *                              Copyright (C) 1998  Andrea Arcangeli
  15 *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16 *  2002-05-31  Move sys_sysinfo here and make its locking sane, Robert Love
  17 *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
  18 *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
  19 *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
  20 */
  21
  22#include <linux/kernel_stat.h>
  23#include <linux/export.h>
  24#include <linux/interrupt.h>
  25#include <linux/percpu.h>
  26#include <linux/init.h>
  27#include <linux/mm.h>
  28#include <linux/swap.h>
  29#include <linux/pid_namespace.h>
  30#include <linux/notifier.h>
  31#include <linux/thread_info.h>
  32#include <linux/time.h>
  33#include <linux/jiffies.h>
  34#include <linux/posix-timers.h>
  35#include <linux/cpu.h>
  36#include <linux/syscalls.h>
  37#include <linux/delay.h>
  38#include <linux/tick.h>
  39#include <linux/kallsyms.h>
  40#include <linux/irq_work.h>
  41#include <linux/sched.h>
  42#include <linux/sched/sysctl.h>
  43#include <linux/slab.h>
  44
  45#include <asm/uaccess.h>
  46#include <asm/unistd.h>
  47#include <asm/div64.h>
  48#include <asm/timex.h>
  49#include <asm/io.h>
  50
  51#define CREATE_TRACE_POINTS
  52#include <trace/events/timer.h>
  53
  54u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
  55
  56EXPORT_SYMBOL(jiffies_64);
  57
  58/*
  59 * per-CPU timer vector definitions:
  60 */
  61#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
  62#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
  63#define TVN_SIZE (1 << TVN_BITS)
  64#define TVR_SIZE (1 << TVR_BITS)
  65#define TVN_MASK (TVN_SIZE - 1)
  66#define TVR_MASK (TVR_SIZE - 1)
  67#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
  68
  69struct tvec {
  70        struct list_head vec[TVN_SIZE];
  71};
  72
  73struct tvec_root {
  74        struct list_head vec[TVR_SIZE];
  75};
  76
  77struct tvec_base {
  78        spinlock_t lock;
  79        struct timer_list *running_timer;
  80        unsigned long timer_jiffies;
  81        unsigned long next_timer;
  82        unsigned long active_timers;
  83        struct tvec_root tv1;
  84        struct tvec tv2;
  85        struct tvec tv3;
  86        struct tvec tv4;
  87        struct tvec tv5;
  88} ____cacheline_aligned;
  89
  90struct tvec_base boot_tvec_bases;
  91EXPORT_SYMBOL(boot_tvec_bases);
  92static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
  93
  94/* Functions below help us manage 'deferrable' flag */
  95static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
  96{
  97        return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
  98}
  99
 100static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
 101{
 102        return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
 103}
 104
 105static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 106{
 107        return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
 108}
 109
 110static inline void
 111timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
 112{
 113        unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
 114
 115        timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
 116}
 117
 118static unsigned long round_jiffies_common(unsigned long j, int cpu,
 119                bool force_up)
 120{
 121        int rem;
 122        unsigned long original = j;
 123
 124        /*
 125         * We don't want all cpus firing their timers at once hitting the
 126         * same lock or cachelines, so we skew each extra cpu with an extra
 127         * 3 jiffies. This 3 jiffies came originally from the mm/ code which
 128         * already did this.
 129         * The skew is done by adding 3*cpunr, then round, then subtract this
 130         * extra offset again.
 131         */
 132        j += cpu * 3;
 133
 134        rem = j % HZ;
 135
 136        /*
 137         * If the target jiffie is just after a whole second (which can happen
 138         * due to delays of the timer irq, long irq off times etc etc) then
 139         * we should round down to the whole second, not up. Use 1/4th second
 140         * as cutoff for this rounding as an extreme upper bound for this.
 141         * But never round down if @force_up is set.
 142         */
 143        if (rem < HZ/4 && !force_up) /* round down */
 144                j = j - rem;
 145        else /* round up */
 146                j = j - rem + HZ;
 147
 148        /* now that we have rounded, subtract the extra skew again */
 149        j -= cpu * 3;
 150
 151        if (j <= jiffies) /* rounding ate our timeout entirely; */
 152                return original;
 153        return j;
 154}
 155
 156/**
 157 * __round_jiffies - function to round jiffies to a full second
 158 * @j: the time in (absolute) jiffies that should be rounded
 159 * @cpu: the processor number on which the timeout will happen
 160 *
 161 * __round_jiffies() rounds an absolute time in the future (in jiffies)
 162 * up or down to (approximately) full seconds. This is useful for timers
 163 * for which the exact time they fire does not matter too much, as long as
 164 * they fire approximately every X seconds.
 165 *
 166 * By rounding these timers to whole seconds, all such timers will fire
 167 * at the same time, rather than at various times spread out. The goal
 168 * of this is to have the CPU wake up less, which saves power.
 169 *
 170 * The exact rounding is skewed for each processor to avoid all
 171 * processors firing at the exact same time, which could lead
 172 * to lock contention or spurious cache line bouncing.
 173 *
 174 * The return value is the rounded version of the @j parameter.
 175 */
 176unsigned long __round_jiffies(unsigned long j, int cpu)
 177{
 178        return round_jiffies_common(j, cpu, false);
 179}
 180EXPORT_SYMBOL_GPL(__round_jiffies);
 181
 182/**
 183 * __round_jiffies_relative - function to round jiffies to a full second
 184 * @j: the time in (relative) jiffies that should be rounded
 185 * @cpu: the processor number on which the timeout will happen
 186 *
 187 * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
 188 * up or down to (approximately) full seconds. This is useful for timers
 189 * for which the exact time they fire does not matter too much, as long as
 190 * they fire approximately every X seconds.
 191 *
 192 * By rounding these timers to whole seconds, all such timers will fire
 193 * at the same time, rather than at various times spread out. The goal
 194 * of this is to have the CPU wake up less, which saves power.
 195 *
 196 * The exact rounding is skewed for each processor to avoid all
 197 * processors firing at the exact same time, which could lead
 198 * to lock contention or spurious cache line bouncing.
 199 *
 200 * The return value is the rounded version of the @j parameter.
 201 */
 202unsigned long __round_jiffies_relative(unsigned long j, int cpu)
 203{
 204        unsigned long j0 = jiffies;
 205
 206        /* Use j0 because jiffies might change while we run */
 207        return round_jiffies_common(j + j0, cpu, false) - j0;
 208}
 209EXPORT_SYMBOL_GPL(__round_jiffies_relative);
 210
 211/**
 212 * round_jiffies - function to round jiffies to a full second
 213 * @j: the time in (absolute) jiffies that should be rounded
 214 *
 215 * round_jiffies() rounds an absolute time in the future (in jiffies)
 216 * up or down to (approximately) full seconds. This is useful for timers
 217 * for which the exact time they fire does not matter too much, as long as
 218 * they fire approximately every X seconds.
 219 *
 220 * By rounding these timers to whole seconds, all such timers will fire
 221 * at the same time, rather than at various times spread out. The goal
 222 * of this is to have the CPU wake up less, which saves power.
 223 *
 224 * The return value is the rounded version of the @j parameter.
 225 */
 226unsigned long round_jiffies(unsigned long j)
 227{
 228        return round_jiffies_common(j, raw_smp_processor_id(), false);
 229}
 230EXPORT_SYMBOL_GPL(round_jiffies);
 231
 232/**
 233 * round_jiffies_relative - function to round jiffies to a full second
 234 * @j: the time in (relative) jiffies that should be rounded
 235 *
 236 * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
 237 * up or down to (approximately) full seconds. This is useful for timers
 238 * for which the exact time they fire does not matter too much, as long as
 239 * they fire approximately every X seconds.
 240 *
 241 * By rounding these timers to whole seconds, all such timers will fire
 242 * at the same time, rather than at various times spread out. The goal
 243 * of this is to have the CPU wake up less, which saves power.
 244 *
 245 * The return value is the rounded version of the @j parameter.
 246 */
 247unsigned long round_jiffies_relative(unsigned long j)
 248{
 249        return __round_jiffies_relative(j, raw_smp_processor_id());
 250}
 251EXPORT_SYMBOL_GPL(round_jiffies_relative);
 252
 253/**
 254 * __round_jiffies_up - function to round jiffies up to a full second
 255 * @j: the time in (absolute) jiffies that should be rounded
 256 * @cpu: the processor number on which the timeout will happen
 257 *
 258 * This is the same as __round_jiffies() except that it will never
 259 * round down.  This is useful for timeouts for which the exact time
 260 * of firing does not matter too much, as long as they don't fire too
 261 * early.
 262 */
 263unsigned long __round_jiffies_up(unsigned long j, int cpu)
 264{
 265        return round_jiffies_common(j, cpu, true);
 266}
 267EXPORT_SYMBOL_GPL(__round_jiffies_up);
 268
 269/**
 270 * __round_jiffies_up_relative - function to round jiffies up to a full second
 271 * @j: the time in (relative) jiffies that should be rounded
 272 * @cpu: the processor number on which the timeout will happen
 273 *
 274 * This is the same as __round_jiffies_relative() except that it will never
 275 * round down.  This is useful for timeouts for which the exact time
 276 * of firing does not matter too much, as long as they don't fire too
 277 * early.
 278 */
 279unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
 280{
 281        unsigned long j0 = jiffies;
 282
 283        /* Use j0 because jiffies might change while we run */
 284        return round_jiffies_common(j + j0, cpu, true) - j0;
 285}
 286EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
 287
 288/**
 289 * round_jiffies_up - function to round jiffies up to a full second
 290 * @j: the time in (absolute) jiffies that should be rounded
 291 *
 292 * This is the same as round_jiffies() except that it will never
 293 * round down.  This is useful for timeouts for which the exact time
 294 * of firing does not matter too much, as long as they don't fire too
 295 * early.
 296 */
 297unsigned long round_jiffies_up(unsigned long j)
 298{
 299        return round_jiffies_common(j, raw_smp_processor_id(), true);
 300}
 301EXPORT_SYMBOL_GPL(round_jiffies_up);
 302
 303/**
 304 * round_jiffies_up_relative - function to round jiffies up to a full second
 305 * @j: the time in (relative) jiffies that should be rounded
 306 *
 307 * This is the same as round_jiffies_relative() except that it will never
 308 * round down.  This is useful for timeouts for which the exact time
 309 * of firing does not matter too much, as long as they don't fire too
 310 * early.
 311 */
 312unsigned long round_jiffies_up_relative(unsigned long j)
 313{
 314        return __round_jiffies_up_relative(j, raw_smp_processor_id());
 315}
 316EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
 317
 318/**
 319 * set_timer_slack - set the allowed slack for a timer
 320 * @timer: the timer to be modified
 321 * @slack_hz: the amount of time (in jiffies) allowed for rounding
 322 *
 323 * Set the amount of time, in jiffies, that a certain timer has
 324 * in terms of slack. By setting this value, the timer subsystem
 325 * will schedule the actual timer somewhere between
 326 * the time mod_timer() asks for, and that time plus the slack.
 327 *
 328 * By setting the slack to -1, a percentage of the delay is used
 329 * instead.
 330 */
 331void set_timer_slack(struct timer_list *timer, int slack_hz)
 332{
 333        timer->slack = slack_hz;
 334}
 335EXPORT_SYMBOL_GPL(set_timer_slack);
 336
 337static void
 338__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 339{
 340        unsigned long expires = timer->expires;
 341        unsigned long idx = expires - base->timer_jiffies;
 342        struct list_head *vec;
 343
 344        if (idx < TVR_SIZE) {
 345                int i = expires & TVR_MASK;
 346                vec = base->tv1.vec + i;
 347        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 348                int i = (expires >> TVR_BITS) & TVN_MASK;
 349                vec = base->tv2.vec + i;
 350        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 351                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 352                vec = base->tv3.vec + i;
 353        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 354                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 355                vec = base->tv4.vec + i;
 356        } else if ((signed long) idx < 0) {
 357                /*
 358                 * Can happen if you add a timer with expires == jiffies,
 359                 * or you set a timer to go off in the past
 360                 */
 361                vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
 362        } else {
 363                int i;
 364                /* If the timeout is larger than MAX_TVAL (on 64-bit
 365                 * architectures or with CONFIG_BASE_SMALL=1) then we
 366                 * use the maximum timeout.
 367                 */
 368                if (idx > MAX_TVAL) {
 369                        idx = MAX_TVAL;
 370                        expires = idx + base->timer_jiffies;
 371                }
 372                i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 373                vec = base->tv5.vec + i;
 374        }
 375        /*
 376         * Timers are FIFO:
 377         */
 378        list_add_tail(&timer->entry, vec);
 379}
 380
 381static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 382{
 383        __internal_add_timer(base, timer);
 384        /*
 385         * Update base->active_timers and base->next_timer
 386         */
 387        if (!tbase_get_deferrable(timer->base)) {
 388                if (time_before(timer->expires, base->next_timer))
 389                        base->next_timer = timer->expires;
 390                base->active_timers++;
 391        }
 392}
 393
 394#ifdef CONFIG_TIMER_STATS
 395void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 396{
 397        if (timer->start_site)
 398                return;
 399
 400        timer->start_site = addr;
 401        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 402        timer->start_pid = current->pid;
 403}
 404
 405static void timer_stats_account_timer(struct timer_list *timer)
 406{
 407        unsigned int flag = 0;
 408
 409        if (likely(!timer->start_site))
 410                return;
 411        if (unlikely(tbase_get_deferrable(timer->base)))
 412                flag |= TIMER_STATS_FLAG_DEFERRABLE;
 413
 414        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
 415                                 timer->function, timer->start_comm, flag);
 416}
 417
 418#else
 419static void timer_stats_account_timer(struct timer_list *timer) {}
 420#endif
 421
 422#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 423
 424static struct debug_obj_descr timer_debug_descr;
 425
 426static void *timer_debug_hint(void *addr)
 427{
 428        return ((struct timer_list *) addr)->function;
 429}
 430
 431/*
 432 * fixup_init is called when:
 433 * - an active object is initialized
 434 */
 435static int timer_fixup_init(void *addr, enum debug_obj_state state)
 436{
 437        struct timer_list *timer = addr;
 438
 439        switch (state) {
 440        case ODEBUG_STATE_ACTIVE:
 441                del_timer_sync(timer);
 442                debug_object_init(timer, &timer_debug_descr);
 443                return 1;
 444        default:
 445                return 0;
 446        }
 447}
 448
 449/* Stub timer callback for improperly used timers. */
 450static void stub_timer(unsigned long data)
 451{
 452        WARN_ON(1);
 453}
 454
 455/*
 456 * fixup_activate is called when:
 457 * - an active object is activated
 458 * - an unknown object is activated (might be a statically initialized object)
 459 */
 460static int timer_fixup_activate(void *addr, enum debug_obj_state state)
 461{
 462        struct timer_list *timer = addr;
 463
 464        switch (state) {
 465
 466        case ODEBUG_STATE_NOTAVAILABLE:
 467                /*
 468                 * This is not really a fixup. The timer was
 469                 * statically initialized. We just make sure that it
 470                 * is tracked in the object tracker.
 471                 */
 472                if (timer->entry.next == NULL &&
 473                    timer->entry.prev == TIMER_ENTRY_STATIC) {
 474                        debug_object_init(timer, &timer_debug_descr);
 475                        debug_object_activate(timer, &timer_debug_descr);
 476                        return 0;
 477                } else {
 478                        setup_timer(timer, stub_timer, 0);
 479                        return 1;
 480                }
 481                return 0;
 482
 483        case ODEBUG_STATE_ACTIVE:
 484                WARN_ON(1);
 485
 486        default:
 487                return 0;
 488        }
 489}
 490
 491/*
 492 * fixup_free is called when:
 493 * - an active object is freed
 494 */
 495static int timer_fixup_free(void *addr, enum debug_obj_state state)
 496{
 497        struct timer_list *timer = addr;
 498
 499        switch (state) {
 500        case ODEBUG_STATE_ACTIVE:
 501                del_timer_sync(timer);
 502                debug_object_free(timer, &timer_debug_descr);
 503                return 1;
 504        default:
 505                return 0;
 506        }
 507}
 508
 509/*
 510 * fixup_assert_init is called when:
 511 * - an untracked/uninit-ed object is found
 512 */
 513static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
 514{
 515        struct timer_list *timer = addr;
 516
 517        switch (state) {
 518        case ODEBUG_STATE_NOTAVAILABLE:
 519                if (timer->entry.prev == TIMER_ENTRY_STATIC) {
 520                        /*
 521                         * This is not really a fixup. The timer was
 522                         * statically initialized. We just make sure that it
 523                         * is tracked in the object tracker.
 524                         */
 525                        debug_object_init(timer, &timer_debug_descr);
 526                        return 0;
 527                } else {
 528                        setup_timer(timer, stub_timer, 0);
 529                        return 1;
 530                }
 531        default:
 532                return 0;
 533        }
 534}
 535
 536static struct debug_obj_descr timer_debug_descr = {
 537        .name                   = "timer_list",
 538        .debug_hint             = timer_debug_hint,
 539        .fixup_init             = timer_fixup_init,
 540        .fixup_activate         = timer_fixup_activate,
 541        .fixup_free             = timer_fixup_free,
 542        .fixup_assert_init      = timer_fixup_assert_init,
 543};
 544
 545static inline void debug_timer_init(struct timer_list *timer)
 546{
 547        debug_object_init(timer, &timer_debug_descr);
 548}
 549
 550static inline void debug_timer_activate(struct timer_list *timer)
 551{
 552        debug_object_activate(timer, &timer_debug_descr);
 553}
 554
 555static inline void debug_timer_deactivate(struct timer_list *timer)
 556{
 557        debug_object_deactivate(timer, &timer_debug_descr);
 558}
 559
 560static inline void debug_timer_free(struct timer_list *timer)
 561{
 562        debug_object_free(timer, &timer_debug_descr);
 563}
 564
 565static inline void debug_timer_assert_init(struct timer_list *timer)
 566{
 567        debug_object_assert_init(timer, &timer_debug_descr);
 568}
 569
 570static void do_init_timer(struct timer_list *timer, unsigned int flags,
 571                          const char *name, struct lock_class_key *key);
 572
 573void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
 574                             const char *name, struct lock_class_key *key)
 575{
 576        debug_object_init_on_stack(timer, &timer_debug_descr);
 577        do_init_timer(timer, flags, name, key);
 578}
 579EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
 580
 581void destroy_timer_on_stack(struct timer_list *timer)
 582{
 583        debug_object_free(timer, &timer_debug_descr);
 584}
 585EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
 586
 587#else
 588static inline void debug_timer_init(struct timer_list *timer) { }
 589static inline void debug_timer_activate(struct timer_list *timer) { }
 590static inline void debug_timer_deactivate(struct timer_list *timer) { }
 591static inline void debug_timer_assert_init(struct timer_list *timer) { }
 592#endif
 593
 594static inline void debug_init(struct timer_list *timer)
 595{
 596        debug_timer_init(timer);
 597        trace_timer_init(timer);
 598}
 599
 600static inline void
 601debug_activate(struct timer_list *timer, unsigned long expires)
 602{
 603        debug_timer_activate(timer);
 604        trace_timer_start(timer, expires);
 605}
 606
 607static inline void debug_deactivate(struct timer_list *timer)
 608{
 609        debug_timer_deactivate(timer);
 610        trace_timer_cancel(timer);
 611}
 612
 613static inline void debug_assert_init(struct timer_list *timer)
 614{
 615        debug_timer_assert_init(timer);
 616}
 617
 618static void do_init_timer(struct timer_list *timer, unsigned int flags,
 619                          const char *name, struct lock_class_key *key)
 620{
 621        struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
 622
 623        timer->entry.next = NULL;
 624        timer->base = (void *)((unsigned long)base | flags);
 625        timer->slack = -1;
 626#ifdef CONFIG_TIMER_STATS
 627        timer->start_site = NULL;
 628        timer->start_pid = -1;
 629        memset(timer->start_comm, 0, TASK_COMM_LEN);
 630#endif
 631        lockdep_init_map(&timer->lockdep_map, name, key, 0);
 632}
 633
 634/**
 635 * init_timer_key - initialize a timer
 636 * @timer: the timer to be initialized
 637 * @flags: timer flags
 638 * @name: name of the timer
 639 * @key: lockdep class key of the fake lock used for tracking timer
 640 *       sync lock dependencies
 641 *
 642 * init_timer_key() must be done to a timer prior calling *any* of the
 643 * other timer functions.
 644 */
 645void init_timer_key(struct timer_list *timer, unsigned int flags,
 646                    const char *name, struct lock_class_key *key)
 647{
 648        debug_init(timer);
 649        do_init_timer(timer, flags, name, key);
 650}
 651EXPORT_SYMBOL(init_timer_key);
 652
 653static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 654{
 655        struct list_head *entry = &timer->entry;
 656
 657        debug_deactivate(timer);
 658
 659        __list_del(entry->prev, entry->next);
 660        if (clear_pending)
 661                entry->next = NULL;
 662        entry->prev = LIST_POISON2;
 663}
 664
 665static inline void
 666detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
 667{
 668        detach_timer(timer, true);
 669        if (!tbase_get_deferrable(timer->base))
 670                base->active_timers--;
 671}
 672
 673static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
 674                             bool clear_pending)
 675{
 676        if (!timer_pending(timer))
 677                return 0;
 678
 679        detach_timer(timer, clear_pending);
 680        if (!tbase_get_deferrable(timer->base)) {
 681                base->active_timers--;
 682                if (timer->expires == base->next_timer)
 683                        base->next_timer = base->timer_jiffies;
 684        }
 685        return 1;
 686}
 687
 688/*
 689 * We are using hashed locking: holding per_cpu(tvec_bases).lock
 690 * means that all timers which are tied to this base via timer->base are
 691 * locked, and the base itself is locked too.
 692 *
 693 * So __run_timers/migrate_timers can safely modify all timers which could
 694 * be found on ->tvX lists.
 695 *
 696 * When the timer's base is locked, and the timer removed from list, it is
 697 * possible to set timer->base = NULL and drop the lock: the timer remains
 698 * locked.
 699 */
 700static struct tvec_base *lock_timer_base(struct timer_list *timer,
 701                                        unsigned long *flags)
 702        __acquires(timer->base->lock)
 703{
 704        struct tvec_base *base;
 705
 706        for (;;) {
 707                struct tvec_base *prelock_base = timer->base;
 708                base = tbase_get_base(prelock_base);
 709                if (likely(base != NULL)) {
 710                        spin_lock_irqsave(&base->lock, *flags);
 711                        if (likely(prelock_base == timer->base))
 712                                return base;
 713                        /* The timer has migrated to another CPU */
 714                        spin_unlock_irqrestore(&base->lock, *flags);
 715                }
 716                cpu_relax();
 717        }
 718}
 719
 720static inline int
 721__mod_timer(struct timer_list *timer, unsigned long expires,
 722                                                bool pending_only, int pinned)
 723{
 724        struct tvec_base *base, *new_base;
 725        unsigned long flags;
 726        int ret = 0 , cpu;
 727
 728        timer_stats_timer_set_start_info(timer);
 729        BUG_ON(!timer->function);
 730
 731        base = lock_timer_base(timer, &flags);
 732
 733        ret = detach_if_pending(timer, base, false);
 734        if (!ret && pending_only)
 735                goto out_unlock;
 736
 737        debug_activate(timer, expires);
 738
 739        cpu = smp_processor_id();
 740
 741#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
 742        if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
 743                cpu = get_nohz_timer_target();
 744#endif
 745        new_base = per_cpu(tvec_bases, cpu);
 746
 747        if (base != new_base) {
 748                /*
 749                 * We are trying to schedule the timer on the local CPU.
 750                 * However we can't change timer's base while it is running,
 751                 * otherwise del_timer_sync() can't detect that the timer's
 752                 * handler yet has not finished. This also guarantees that
 753                 * the timer is serialized wrt itself.
 754                 */
 755                if (likely(base->running_timer != timer)) {
 756                        /* See the comment in lock_timer_base() */
 757                        timer_set_base(timer, NULL);
 758                        spin_unlock(&base->lock);
 759                        base = new_base;
 760                        spin_lock(&base->lock);
 761                        timer_set_base(timer, base);
 762                }
 763        }
 764
 765        timer->expires = expires;
 766        internal_add_timer(base, timer);
 767
 768out_unlock:
 769        spin_unlock_irqrestore(&base->lock, flags);
 770
 771        return ret;
 772}
 773
 774/**
 775 * mod_timer_pending - modify a pending timer's timeout
 776 * @timer: the pending timer to be modified
 777 * @expires: new timeout in jiffies
 778 *
 779 * mod_timer_pending() is the same for pending timers as mod_timer(),
 780 * but will not re-activate and modify already deleted timers.
 781 *
 782 * It is useful for unserialized use of timers.
 783 */
 784int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 785{
 786        return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
 787}
 788EXPORT_SYMBOL(mod_timer_pending);
 789
 790/*
 791 * Decide where to put the timer while taking the slack into account
 792 *
 793 * Algorithm:
 794 *   1) calculate the maximum (absolute) time
 795 *   2) calculate the highest bit where the expires and new max are different
 796 *   3) use this bit to make a mask
 797 *   4) use the bitmask to round down the maximum time, so that all last
 798 *      bits are zeros
 799 */
 800static inline
 801unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
 802{
 803        unsigned long expires_limit, mask;
 804        int bit;
 805
 806        if (timer->slack >= 0) {
 807                expires_limit = expires + timer->slack;
 808        } else {
 809                long delta = expires - jiffies;
 810
 811                if (delta < 256)
 812                        return expires;
 813
 814                expires_limit = expires + delta / 256;
 815        }
 816        mask = expires ^ expires_limit;
 817        if (mask == 0)
 818                return expires;
 819
 820        bit = find_last_bit(&mask, BITS_PER_LONG);
 821
 822        mask = (1 << bit) - 1;
 823
 824        expires_limit = expires_limit & ~(mask);
 825
 826        return expires_limit;
 827}
 828
 829/**
 830 * mod_timer - modify a timer's timeout
 831 * @timer: the timer to be modified
 832 * @expires: new timeout in jiffies
 833 *
 834 * mod_timer() is a more efficient way to update the expire field of an
 835 * active timer (if the timer is inactive it will be activated)
 836 *
 837 * mod_timer(timer, expires) is equivalent to:
 838 *
 839 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 840 *
 841 * Note that if there are multiple unserialized concurrent users of the
 842 * same timer, then mod_timer() is the only safe way to modify the timeout,
 843 * since add_timer() cannot modify an already running timer.
 844 *
 845 * The function returns whether it has modified a pending timer or not.
 846 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
 847 * active timer returns 1.)
 848 */
 849int mod_timer(struct timer_list *timer, unsigned long expires)
 850{
 851        expires = apply_slack(timer, expires);
 852
 853        /*
 854         * This is a common optimization triggered by the
 855         * networking code - if the timer is re-modified
 856         * to be the same thing then just return:
 857         */
 858        if (timer_pending(timer) && timer->expires == expires)
 859                return 1;
 860
 861        return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 862}
 863EXPORT_SYMBOL(mod_timer);
 864
 865/**
 866 * mod_timer_pinned - modify a timer's timeout
 867 * @timer: the timer to be modified
 868 * @expires: new timeout in jiffies
 869 *
 870 * mod_timer_pinned() is a way to update the expire field of an
 871 * active timer (if the timer is inactive it will be activated)
 872 * and to ensure that the timer is scheduled on the current CPU.
 873 *
 874 * Note that this does not prevent the timer from being migrated
 875 * when the current CPU goes offline.  If this is a problem for
 876 * you, use CPU-hotplug notifiers to handle it correctly, for
 877 * example, cancelling the timer when the corresponding CPU goes
 878 * offline.
 879 *
 880 * mod_timer_pinned(timer, expires) is equivalent to:
 881 *
 882 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 883 */
 884int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
 885{
 886        if (timer->expires == expires && timer_pending(timer))
 887                return 1;
 888
 889        return __mod_timer(timer, expires, false, TIMER_PINNED);
 890}
 891EXPORT_SYMBOL(mod_timer_pinned);
 892
 893/**
 894 * add_timer - start a timer
 895 * @timer: the timer to be added
 896 *
 897 * The kernel will do a ->function(->data) callback from the
 898 * timer interrupt at the ->expires point in the future. The
 899 * current time is 'jiffies'.
 900 *
 901 * The timer's ->expires, ->function (and if the handler uses it, ->data)
 902 * fields must be set prior calling this function.
 903 *
 904 * Timers with an ->expires field in the past will be executed in the next
 905 * timer tick.
 906 */
 907void add_timer(struct timer_list *timer)
 908{
 909        BUG_ON(timer_pending(timer));
 910        mod_timer(timer, timer->expires);
 911}
 912EXPORT_SYMBOL(add_timer);
 913
 914/**
 915 * add_timer_on - start a timer on a particular CPU
 916 * @timer: the timer to be added
 917 * @cpu: the CPU to start it on
 918 *
 919 * This is not very scalable on SMP. Double adds are not possible.
 920 */
 921void add_timer_on(struct timer_list *timer, int cpu)
 922{
 923        struct tvec_base *base = per_cpu(tvec_bases, cpu);
 924        unsigned long flags;
 925
 926        timer_stats_timer_set_start_info(timer);
 927        BUG_ON(timer_pending(timer) || !timer->function);
 928        spin_lock_irqsave(&base->lock, flags);
 929        timer_set_base(timer, base);
 930        debug_activate(timer, timer->expires);
 931        internal_add_timer(base, timer);
 932        /*
 933         * Check whether the other CPU is idle and needs to be
 934         * triggered to reevaluate the timer wheel when nohz is
 935         * active. We are protected against the other CPU fiddling
 936         * with the timer by holding the timer base lock. This also
 937         * makes sure that a CPU on the way to idle can not evaluate
 938         * the timer wheel.
 939         */
 940        wake_up_idle_cpu(cpu);
 941        spin_unlock_irqrestore(&base->lock, flags);
 942}
 943EXPORT_SYMBOL_GPL(add_timer_on);
 944
 945/**
 946 * del_timer - deactive a timer.
 947 * @timer: the timer to be deactivated
 948 *
 949 * del_timer() deactivates a timer - this works on both active and inactive
 950 * timers.
 951 *
 952 * The function returns whether it has deactivated a pending timer or not.
 953 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
 954 * active timer returns 1.)
 955 */
 956int del_timer(struct timer_list *timer)
 957{
 958        struct tvec_base *base;
 959        unsigned long flags;
 960        int ret = 0;
 961
 962        debug_assert_init(timer);
 963
 964        timer_stats_timer_clear_start_info(timer);
 965        if (timer_pending(timer)) {
 966                base = lock_timer_base(timer, &flags);
 967                ret = detach_if_pending(timer, base, true);
 968                spin_unlock_irqrestore(&base->lock, flags);
 969        }
 970
 971        return ret;
 972}
 973EXPORT_SYMBOL(del_timer);
 974
 975/**
 976 * try_to_del_timer_sync - Try to deactivate a timer
 977 * @timer: timer do del
 978 *
 979 * This function tries to deactivate a timer. Upon successful (ret >= 0)
 980 * exit the timer is not queued and the handler is not running on any CPU.
 981 */
 982int try_to_del_timer_sync(struct timer_list *timer)
 983{
 984        struct tvec_base *base;
 985        unsigned long flags;
 986        int ret = -1;
 987
 988        debug_assert_init(timer);
 989
 990        base = lock_timer_base(timer, &flags);
 991
 992        if (base->running_timer != timer) {
 993                timer_stats_timer_clear_start_info(timer);
 994                ret = detach_if_pending(timer, base, true);
 995        }
 996        spin_unlock_irqrestore(&base->lock, flags);
 997
 998        return ret;
 999}
1000EXPORT_SYMBOL(try_to_del_timer_sync);
1001
1002#ifdef CONFIG_SMP
1003/**
1004 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1005 * @timer: the timer to be deactivated
1006 *
1007 * This function only differs from del_timer() on SMP: besides deactivating
1008 * the timer it also makes sure the handler has finished executing on other
1009 * CPUs.
1010 *
1011 * Synchronization rules: Callers must prevent restarting of the timer,
1012 * otherwise this function is meaningless. It must not be called from
1013 * interrupt contexts unless the timer is an irqsafe one. The caller must
1014 * not hold locks which would prevent completion of the timer's
1015 * handler. The timer's handler must not call add_timer_on(). Upon exit the
1016 * timer is not queued and the handler is not running on any CPU.
1017 *
1018 * Note: For !irqsafe timers, you must not hold locks that are held in
1019 *   interrupt context while calling this function. Even if the lock has
1020 *   nothing to do with the timer in question.  Here's why:
1021 *
1022 *    CPU0                             CPU1
1023 *    ----                             ----
1024 *                                   <SOFTIRQ>
1025 *                                   call_timer_fn();
1026 *                                     base->running_timer = mytimer;
1027 *  spin_lock_irq(somelock);
1028 *                                     <IRQ>
1029 *                                        spin_lock(somelock);
1030 *  del_timer_sync(mytimer);
1031 *   while (base->running_timer == mytimer);
1032 *
1033 * Now del_timer_sync() will never return and never release somelock.
1034 * The interrupt on the other CPU is waiting to grab somelock but
1035 * it has interrupted the softirq that CPU0 is waiting to finish.
1036 *
1037 * The function returns whether it has deactivated a pending timer or not.
1038 */
1039int del_timer_sync(struct timer_list *timer)
1040{
1041#ifdef CONFIG_LOCKDEP
1042        unsigned long flags;
1043
1044        /*
1045         * If lockdep gives a backtrace here, please reference
1046         * the synchronization rules above.
1047         */
1048        local_irq_save(flags);
1049        lock_map_acquire(&timer->lockdep_map);
1050        lock_map_release(&timer->lockdep_map);
1051        local_irq_restore(flags);
1052#endif
1053        /*
1054         * don't use it in hardirq context, because it
1055         * could lead to deadlock.
1056         */
1057        WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
1058        for (;;) {
1059                int ret = try_to_del_timer_sync(timer);
1060                if (ret >= 0)
1061                        return ret;
1062                cpu_relax();
1063        }
1064}
1065EXPORT_SYMBOL(del_timer_sync);
1066#endif
1067
1068static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1069{
1070        /* cascade all the timers from tv up one level */
1071        struct timer_list *timer, *tmp;
1072        struct list_head tv_list;
1073
1074        list_replace_init(tv->vec + index, &tv_list);
1075
1076        /*
1077         * We are removing _all_ timers from the list, so we
1078         * don't have to detach them individually.
1079         */
1080        list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
1081                BUG_ON(tbase_get_base(timer->base) != base);
1082                /* No accounting, while moving them */
1083                __internal_add_timer(base, timer);
1084        }
1085
1086        return index;
1087}
1088
1089static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1090                          unsigned long data)
1091{
1092        int preempt_count = preempt_count();
1093
1094#ifdef CONFIG_LOCKDEP
1095        /*
1096         * It is permissible to free the timer from inside the
1097         * function that is called from it, this we need to take into
1098         * account for lockdep too. To avoid bogus "held lock freed"
1099         * warnings as well as problems when looking into
1100         * timer->lockdep_map, make a copy and use that here.
1101         */
1102        struct lockdep_map lockdep_map;
1103
1104        lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1105#endif
1106        /*
1107         * Couple the lock chain with the lock chain at
1108         * del_timer_sync() by acquiring the lock_map around the fn()
1109         * call here and in del_timer_sync().
1110         */
1111        lock_map_acquire(&lockdep_map);
1112
1113        trace_timer_expire_entry(timer);
1114        fn(data);
1115        trace_timer_expire_exit(timer);
1116
1117        lock_map_release(&lockdep_map);
1118
1119        if (preempt_count != preempt_count()) {
1120                WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1121                          fn, preempt_count, preempt_count());
1122                /*
1123                 * Restore the preempt count. That gives us a decent
1124                 * chance to survive and extract information. If the
1125                 * callback kept a lock held, bad luck, but not worse
1126                 * than the BUG() we had.
1127                 */
1128                preempt_count() = preempt_count;
1129        }
1130}
1131
1132#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
1133
1134/**
1135 * __run_timers - run all expired timers (if any) on this CPU.
1136 * @base: the timer vector to be processed.
1137 *
1138 * This function cascades all vectors and executes all expired timer
1139 * vectors.
1140 */
1141static inline void __run_timers(struct tvec_base *base)
1142{
1143        struct timer_list *timer;
1144
1145        spin_lock_irq(&base->lock);
1146        while (time_after_eq(jiffies, base->timer_jiffies)) {
1147                struct list_head work_list;
1148                struct list_head *head = &work_list;
1149                int index = base->timer_jiffies & TVR_MASK;
1150
1151                /*
1152                 * Cascade timers:
1153                 */
1154                if (!index &&
1155                        (!cascade(base, &base->tv2, INDEX(0))) &&
1156                                (!cascade(base, &base->tv3, INDEX(1))) &&
1157                                        !cascade(base, &base->tv4, INDEX(2)))
1158                        cascade(base, &base->tv5, INDEX(3));
1159                ++base->timer_jiffies;
1160                list_replace_init(base->tv1.vec + index, &work_list);
1161                while (!list_empty(head)) {
1162                        void (*fn)(unsigned long);
1163                        unsigned long data;
1164                        bool irqsafe;
1165
1166                        timer = list_first_entry(head, struct timer_list,entry);
1167                        fn = timer->function;
1168                        data = timer->data;
1169                        irqsafe = tbase_get_irqsafe(timer->base);
1170
1171                        timer_stats_account_timer(timer);
1172
1173                        base->running_timer = timer;
1174                        detach_expired_timer(timer, base);
1175
1176                        if (irqsafe) {
1177                                spin_unlock(&base->lock);
1178                                call_timer_fn(timer, fn, data);
1179                                spin_lock(&base->lock);
1180                        } else {
1181                                spin_unlock_irq(&base->lock);
1182                                call_timer_fn(timer, fn, data);
1183                                spin_lock_irq(&base->lock);
1184                        }
1185                }
1186        }
1187        base->running_timer = NULL;
1188        spin_unlock_irq(&base->lock);
1189}
1190
1191#ifdef CONFIG_NO_HZ
1192/*
1193 * Find out when the next timer event is due to happen. This
1194 * is used on S/390 to stop all activity when a CPU is idle.
1195 * This function needs to be called with interrupts disabled.
1196 */
1197static unsigned long __next_timer_interrupt(struct tvec_base *base)
1198{
1199        unsigned long timer_jiffies = base->timer_jiffies;
1200        unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
1201        int index, slot, array, found = 0;
1202        struct timer_list *nte;
1203        struct tvec *varray[4];
1204
1205        /* Look for timer events in tv1. */
1206        index = slot = timer_jiffies & TVR_MASK;
1207        do {
1208                list_for_each_entry(nte, base->tv1.vec + slot, entry) {
1209                        if (tbase_get_deferrable(nte->base))
1210                                continue;
1211
1212                        found = 1;
1213                        expires = nte->expires;
1214                        /* Look at the cascade bucket(s)? */
1215                        if (!index || slot < index)
1216                                goto cascade;
1217                        return expires;
1218                }
1219                slot = (slot + 1) & TVR_MASK;
1220        } while (slot != index);
1221
1222cascade:
1223        /* Calculate the next cascade event */
1224        if (index)
1225                timer_jiffies += TVR_SIZE - index;
1226        timer_jiffies >>= TVR_BITS;
1227
1228        /* Check tv2-tv5. */
1229        varray[0] = &base->tv2;
1230        varray[1] = &base->tv3;
1231        varray[2] = &base->tv4;
1232        varray[3] = &base->tv5;
1233
1234        for (array = 0; array < 4; array++) {
1235                struct tvec *varp = varray[array];
1236
1237                index = slot = timer_jiffies & TVN_MASK;
1238                do {
1239                        list_for_each_entry(nte, varp->vec + slot, entry) {
1240                                if (tbase_get_deferrable(nte->base))
1241                                        continue;
1242
1243                                found = 1;
1244                                if (time_before(nte->expires, expires))
1245                                        expires = nte->expires;
1246                        }
1247                        /*
1248                         * Do we still search for the first timer or are
1249                         * we looking up the cascade buckets ?
1250                         */
1251                        if (found) {
1252                                /* Look at the cascade bucket(s)? */
1253                                if (!index || slot < index)
1254                                        break;
1255                                return expires;
1256                        }
1257                        slot = (slot + 1) & TVN_MASK;
1258                } while (slot != index);
1259
1260                if (index)
1261                        timer_jiffies += TVN_SIZE - index;
1262                timer_jiffies >>= TVN_BITS;
1263        }
1264        return expires;
1265}
1266
1267/*
1268 * Check, if the next hrtimer event is before the next timer wheel
1269 * event:
1270 */
1271static unsigned long cmp_next_hrtimer_event(unsigned long now,
1272                                            unsigned long expires)
1273{
1274        ktime_t hr_delta = hrtimer_get_next_event();
1275        struct timespec tsdelta;
1276        unsigned long delta;
1277
1278        if (hr_delta.tv64 == KTIME_MAX)
1279                return expires;
1280
1281        /*
1282         * Expired timer available, let it expire in the next tick
1283         */
1284        if (hr_delta.tv64 <= 0)
1285                return now + 1;
1286
1287        tsdelta = ktime_to_timespec(hr_delta);
1288        delta = timespec_to_jiffies(&tsdelta);
1289
1290        /*
1291         * Limit the delta to the max value, which is checked in
1292         * tick_nohz_stop_sched_tick():
1293         */
1294        if (delta > NEXT_TIMER_MAX_DELTA)
1295                delta = NEXT_TIMER_MAX_DELTA;
1296
1297        /*
1298         * Take rounding errors in to account and make sure, that it
1299         * expires in the next tick. Otherwise we go into an endless
1300         * ping pong due to tick_nohz_stop_sched_tick() retriggering
1301         * the timer softirq
1302         */
1303        if (delta < 1)
1304                delta = 1;
1305        now += delta;
1306        if (time_before(now, expires))
1307                return now;
1308        return expires;
1309}
1310
1311/**
1312 * get_next_timer_interrupt - return the jiffy of the next pending timer
1313 * @now: current time (in jiffies)
1314 */
1315unsigned long get_next_timer_interrupt(unsigned long now)
1316{
1317        struct tvec_base *base = __this_cpu_read(tvec_bases);
1318        unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
1319
1320        /*
1321         * Pretend that there is no timer pending if the cpu is offline.
1322         * Possible pending timers will be migrated later to an active cpu.
1323         */
1324        if (cpu_is_offline(smp_processor_id()))
1325                return expires;
1326
1327        spin_lock(&base->lock);
1328        if (base->active_timers) {
1329                if (time_before_eq(base->next_timer, base->timer_jiffies))
1330                        base->next_timer = __next_timer_interrupt(base);
1331                expires = base->next_timer;
1332        }
1333        spin_unlock(&base->lock);
1334
1335        if (time_before_eq(expires, now))
1336                return now;
1337
1338        return cmp_next_hrtimer_event(now, expires);
1339}
1340#endif
1341
1342/*
1343 * Called from the timer interrupt handler to charge one tick to the current
1344 * process.  user_tick is 1 if the tick is user time, 0 for system.
1345 */
1346void update_process_times(int user_tick)
1347{
1348        struct task_struct *p = current;
1349        int cpu = smp_processor_id();
1350
1351        /* Note: this timer irq context must be accounted for as well. */
1352        account_process_tick(p, user_tick);
1353        run_local_timers();
1354        rcu_check_callbacks(cpu, user_tick);
1355#ifdef CONFIG_IRQ_WORK
1356        if (in_irq())
1357                irq_work_run();
1358#endif
1359        scheduler_tick();
1360        run_posix_cpu_timers(p);
1361}
1362
1363/*
1364 * This function runs timers and the timer-tq in bottom half context.
1365 */
1366static void run_timer_softirq(struct softirq_action *h)
1367{
1368        struct tvec_base *base = __this_cpu_read(tvec_bases);
1369
1370        hrtimer_run_pending();
1371
1372        if (time_after_eq(jiffies, base->timer_jiffies))
1373                __run_timers(base);
1374}
1375
1376/*
1377 * Called by the local, per-CPU timer interrupt on SMP.
1378 */
1379void run_local_timers(void)
1380{
1381        hrtimer_run_queues();
1382        raise_softirq(TIMER_SOFTIRQ);
1383}
1384
1385#ifdef __ARCH_WANT_SYS_ALARM
1386
1387/*
1388 * For backwards compatibility?  This can be done in libc so Alpha
1389 * and all newer ports shouldn't need it.
1390 */
1391SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1392{
1393        return alarm_setitimer(seconds);
1394}
1395
1396#endif
1397
1398/**
1399 * sys_getpid - return the thread group id of the current process
1400 *
1401 * Note, despite the name, this returns the tgid not the pid.  The tgid and
1402 * the pid are identical unless CLONE_THREAD was specified on clone() in
1403 * which case the tgid is the same in all threads of the same group.
1404 *
1405 * This is SMP safe as current->tgid does not change.
1406 */
1407SYSCALL_DEFINE0(getpid)
1408{
1409        return task_tgid_vnr(current);
1410}
1411
1412/*
1413 * Accessing ->real_parent is not SMP-safe, it could
1414 * change from under us. However, we can use a stale
1415 * value of ->real_parent under rcu_read_lock(), see
1416 * release_task()->call_rcu(delayed_put_task_struct).
1417 */
1418SYSCALL_DEFINE0(getppid)
1419{
1420        int pid;
1421
1422        rcu_read_lock();
1423        pid = task_tgid_vnr(rcu_dereference(current->real_parent));
1424        rcu_read_unlock();
1425
1426        return pid;
1427}
1428
1429SYSCALL_DEFINE0(getuid)
1430{
1431        /* Only we change this so SMP safe */
1432        return from_kuid_munged(current_user_ns(), current_uid());
1433}
1434
1435SYSCALL_DEFINE0(geteuid)
1436{
1437        /* Only we change this so SMP safe */
1438        return from_kuid_munged(current_user_ns(), current_euid());
1439}
1440
1441SYSCALL_DEFINE0(getgid)
1442{
1443        /* Only we change this so SMP safe */
1444        return from_kgid_munged(current_user_ns(), current_gid());
1445}
1446
1447SYSCALL_DEFINE0(getegid)
1448{
1449        /* Only we change this so SMP safe */
1450        return from_kgid_munged(current_user_ns(), current_egid());
1451}
1452
1453static void process_timeout(unsigned long __data)
1454{
1455        wake_up_process((struct task_struct *)__data);
1456}
1457
1458/**
1459 * schedule_timeout - sleep until timeout
1460 * @timeout: timeout value in jiffies
1461 *
1462 * Make the current task sleep until @timeout jiffies have
1463 * elapsed. The routine will return immediately unless
1464 * the current task state has been set (see set_current_state()).
1465 *
1466 * You can set the task state as follows -
1467 *
1468 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1469 * pass before the routine returns. The routine will return 0
1470 *
1471 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1472 * delivered to the current task. In this case the remaining time
1473 * in jiffies will be returned, or 0 if the timer expired in time
1474 *
1475 * The current task state is guaranteed to be TASK_RUNNING when this
1476 * routine returns.
1477 *
1478 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1479 * the CPU away without a bound on the timeout. In this case the return
1480 * value will be %MAX_SCHEDULE_TIMEOUT.
1481 *
1482 * In all cases the return value is guaranteed to be non-negative.
1483 */
1484signed long __sched schedule_timeout(signed long timeout)
1485{
1486        struct timer_list timer;
1487        unsigned long expire;
1488
1489        switch (timeout)
1490        {
1491        case MAX_SCHEDULE_TIMEOUT:
1492                /*
1493                 * These two special cases are useful to be comfortable
1494                 * in the caller. Nothing more. We could take
1495                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
1496                 * but I' d like to return a valid offset (>=0) to allow
1497                 * the caller to do everything it want with the retval.
1498                 */
1499                schedule();
1500                goto out;
1501        default:
1502                /*
1503                 * Another bit of PARANOID. Note that the retval will be
1504                 * 0 since no piece of kernel is supposed to do a check
1505                 * for a negative retval of schedule_timeout() (since it
1506                 * should never happens anyway). You just have the printk()
1507                 * that will tell you if something is gone wrong and where.
1508                 */
1509                if (timeout < 0) {
1510                        printk(KERN_ERR "schedule_timeout: wrong timeout "
1511                                "value %lx\n", timeout);
1512                        dump_stack();
1513                        current->state = TASK_RUNNING;
1514                        goto out;
1515                }
1516        }
1517
1518        expire = timeout + jiffies;
1519
1520        setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1521        __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1522        schedule();
1523        del_singleshot_timer_sync(&timer);
1524
1525        /* Remove the timer from the object tracker */
1526        destroy_timer_on_stack(&timer);
1527
1528        timeout = expire - jiffies;
1529
1530 out:
1531        return timeout < 0 ? 0 : timeout;
1532}
1533EXPORT_SYMBOL(schedule_timeout);
1534
1535/*
1536 * We can use __set_current_state() here because schedule_timeout() calls
1537 * schedule() unconditionally.
1538 */
1539signed long __sched schedule_timeout_interruptible(signed long timeout)
1540{
1541        __set_current_state(TASK_INTERRUPTIBLE);
1542        return schedule_timeout(timeout);
1543}
1544EXPORT_SYMBOL(schedule_timeout_interruptible);
1545
1546signed long __sched schedule_timeout_killable(signed long timeout)
1547{
1548        __set_current_state(TASK_KILLABLE);
1549        return schedule_timeout(timeout);
1550}
1551EXPORT_SYMBOL(schedule_timeout_killable);
1552
1553signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1554{
1555        __set_current_state(TASK_UNINTERRUPTIBLE);
1556        return schedule_timeout(timeout);
1557}
1558EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1559
1560/* Thread ID - the internal kernel "pid" */
1561SYSCALL_DEFINE0(gettid)
1562{
1563        return task_pid_vnr(current);
1564}
1565
1566/**
1567 * do_sysinfo - fill in sysinfo struct
1568 * @info: pointer to buffer to fill
1569 */
1570int do_sysinfo(struct sysinfo *info)
1571{
1572        unsigned long mem_total, sav_total;
1573        unsigned int mem_unit, bitcount;
1574        struct timespec tp;
1575
1576        memset(info, 0, sizeof(struct sysinfo));
1577
1578        ktime_get_ts(&tp);
1579        monotonic_to_bootbased(&tp);
1580        info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1581
1582        get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
1583
1584        info->procs = nr_threads;
1585
1586        si_meminfo(info);
1587        si_swapinfo(info);
1588
1589        /*
1590         * If the sum of all the available memory (i.e. ram + swap)
1591         * is less than can be stored in a 32 bit unsigned long then
1592         * we can be binary compatible with 2.2.x kernels.  If not,
1593         * well, in that case 2.2.x was broken anyways...
1594         *
1595         *  -Erik Andersen <andersee@debian.org>
1596         */
1597
1598        mem_total = info->totalram + info->totalswap;
1599        if (mem_total < info->totalram || mem_total < info->totalswap)
1600                goto out;
1601        bitcount = 0;
1602        mem_unit = info->mem_unit;
1603        while (mem_unit > 1) {
1604                bitcount++;
1605                mem_unit >>= 1;
1606                sav_total = mem_total;
1607                mem_total <<= 1;
1608                if (mem_total < sav_total)
1609                        goto out;
1610        }
1611
1612        /*
1613         * If mem_total did not overflow, multiply all memory values by
1614         * info->mem_unit and set it to 1.  This leaves things compatible
1615         * with 2.2.x, and also retains compatibility with earlier 2.4.x
1616         * kernels...
1617         */
1618
1619        info->mem_unit = 1;
1620        info->totalram <<= bitcount;
1621        info->freeram <<= bitcount;
1622        info->sharedram <<= bitcount;
1623        info->bufferram <<= bitcount;
1624        info->totalswap <<= bitcount;
1625        info->freeswap <<= bitcount;
1626        info->totalhigh <<= bitcount;
1627        info->freehigh <<= bitcount;
1628
1629out:
1630        return 0;
1631}
1632
1633SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
1634{
1635        struct sysinfo val;
1636
1637        do_sysinfo(&val);
1638
1639        if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1640                return -EFAULT;
1641
1642        return 0;
1643}
1644
1645static int __cpuinit init_timers_cpu(int cpu)
1646{
1647        int j;
1648        struct tvec_base *base;
1649        static char __cpuinitdata tvec_base_done[NR_CPUS];
1650
1651        if (!tvec_base_done[cpu]) {
1652                static char boot_done;
1653
1654                if (boot_done) {
1655                        /*
1656                         * The APs use this path later in boot
1657                         */
1658                        base = kmalloc_node(sizeof(*base),
1659                                                GFP_KERNEL | __GFP_ZERO,
1660                                                cpu_to_node(cpu));
1661                        if (!base)
1662                                return -ENOMEM;
1663
1664                        /* Make sure that tvec_base is 2 byte aligned */
1665                        if (tbase_get_deferrable(base)) {
1666                                WARN_ON(1);
1667                                kfree(base);
1668                                return -ENOMEM;
1669                        }
1670                        per_cpu(tvec_bases, cpu) = base;
1671                } else {
1672                        /*
1673                         * This is for the boot CPU - we use compile-time
1674                         * static initialisation because per-cpu memory isn't
1675                         * ready yet and because the memory allocators are not
1676                         * initialised either.
1677                         */
1678                        boot_done = 1;
1679                        base = &boot_tvec_bases;
1680                }
1681                tvec_base_done[cpu] = 1;
1682        } else {
1683                base = per_cpu(tvec_bases, cpu);
1684        }
1685
1686        spin_lock_init(&base->lock);
1687
1688        for (j = 0; j < TVN_SIZE; j++) {
1689                INIT_LIST_HEAD(base->tv5.vec + j);
1690                INIT_LIST_HEAD(base->tv4.vec + j);
1691                INIT_LIST_HEAD(base->tv3.vec + j);
1692                INIT_LIST_HEAD(base->tv2.vec + j);
1693        }
1694        for (j = 0; j < TVR_SIZE; j++)
1695                INIT_LIST_HEAD(base->tv1.vec + j);
1696
1697        base->timer_jiffies = jiffies;
1698        base->next_timer = base->timer_jiffies;
1699        base->active_timers = 0;
1700        return 0;
1701}
1702
1703#ifdef CONFIG_HOTPLUG_CPU
1704static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
1705{
1706        struct timer_list *timer;
1707
1708        while (!list_empty(head)) {
1709                timer = list_first_entry(head, struct timer_list, entry);
1710                /* We ignore the accounting on the dying cpu */
1711                detach_timer(timer, false);
1712                timer_set_base(timer, new_base);
1713                internal_add_timer(new_base, timer);
1714        }
1715}
1716
1717static void __cpuinit migrate_timers(int cpu)
1718{
1719        struct tvec_base *old_base;
1720        struct tvec_base *new_base;
1721        int i;
1722
1723        BUG_ON(cpu_online(cpu));
1724        old_base = per_cpu(tvec_bases, cpu);
1725        new_base = get_cpu_var(tvec_bases);
1726        /*
1727         * The caller is globally serialized and nobody else
1728         * takes two locks at once, deadlock is not possible.
1729         */
1730        spin_lock_irq(&new_base->lock);
1731        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1732
1733        BUG_ON(old_base->running_timer);
1734
1735        for (i = 0; i < TVR_SIZE; i++)
1736                migrate_timer_list(new_base, old_base->tv1.vec + i);
1737        for (i = 0; i < TVN_SIZE; i++) {
1738                migrate_timer_list(new_base, old_base->tv2.vec + i);
1739                migrate_timer_list(new_base, old_base->tv3.vec + i);
1740                migrate_timer_list(new_base, old_base->tv4.vec + i);
1741                migrate_timer_list(new_base, old_base->tv5.vec + i);
1742        }
1743
1744        spin_unlock(&old_base->lock);
1745        spin_unlock_irq(&new_base->lock);
1746        put_cpu_var(tvec_bases);
1747}
1748#endif /* CONFIG_HOTPLUG_CPU */
1749
1750static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1751                                unsigned long action, void *hcpu)
1752{
1753        long cpu = (long)hcpu;
1754        int err;
1755
1756        switch(action) {
1757        case CPU_UP_PREPARE:
1758        case CPU_UP_PREPARE_FROZEN:
1759                err = init_timers_cpu(cpu);
1760                if (err < 0)
1761                        return notifier_from_errno(err);
1762                break;
1763#ifdef CONFIG_HOTPLUG_CPU
1764        case CPU_DEAD:
1765        case CPU_DEAD_FROZEN:
1766                migrate_timers(cpu);
1767                break;
1768#endif
1769        default:
1770                break;
1771        }
1772        return NOTIFY_OK;
1773}
1774
1775static struct notifier_block __cpuinitdata timers_nb = {
1776        .notifier_call  = timer_cpu_notify,
1777};
1778
1779
1780void __init init_timers(void)
1781{
1782        int err;
1783
1784        /* ensure there are enough low bits for flags in timer->base pointer */
1785        BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
1786
1787        err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1788                               (void *)(long)smp_processor_id());
1789        init_timer_stats();
1790
1791        BUG_ON(err != NOTIFY_OK);
1792        register_cpu_notifier(&timers_nb);
1793        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1794}
1795
1796/**
1797 * msleep - sleep safely even with waitqueue interruptions
1798 * @msecs: Time in milliseconds to sleep for
1799 */
1800void msleep(unsigned int msecs)
1801{
1802        unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1803
1804        while (timeout)
1805                timeout = schedule_timeout_uninterruptible(timeout);
1806}
1807
1808EXPORT_SYMBOL(msleep);
1809
1810/**
1811 * msleep_interruptible - sleep waiting for signals
1812 * @msecs: Time in milliseconds to sleep for
1813 */
1814unsigned long msleep_interruptible(unsigned int msecs)
1815{
1816        unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1817
1818        while (timeout && !signal_pending(current))
1819                timeout = schedule_timeout_interruptible(timeout);
1820        return jiffies_to_msecs(timeout);
1821}
1822
1823EXPORT_SYMBOL(msleep_interruptible);
1824
1825static int __sched do_usleep_range(unsigned long min, unsigned long max)
1826{
1827        ktime_t kmin;
1828        unsigned long delta;
1829
1830        kmin = ktime_set(0, min * NSEC_PER_USEC);
1831        delta = (max - min) * NSEC_PER_USEC;
1832        return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1833}
1834
1835/**
1836 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1837 * @min: Minimum time in usecs to sleep
1838 * @max: Maximum time in usecs to sleep
1839 */
1840void usleep_range(unsigned long min, unsigned long max)
1841{
1842        __set_current_state(TASK_UNINTERRUPTIBLE);
1843        do_usleep_range(min, max);
1844}
1845EXPORT_SYMBOL(usleep_range);
1846
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.