linux/kernel/posix-cpu-timers.c
<<
>>
Prefs
   1/*
   2 * Implement CPU time clocks for the POSIX clock interface.
   3 */
   4
   5#include <linux/sched.h>
   6#include <linux/posix-timers.h>
   7#include <linux/errno.h>
   8#include <linux/math64.h>
   9#include <asm/uaccess.h>
  10
  11static int check_clock(const clockid_t which_clock)
  12{
  13        int error = 0;
  14        struct task_struct *p;
  15        const pid_t pid = CPUCLOCK_PID(which_clock);
  16
  17        if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
  18                return -EINVAL;
  19
  20        if (pid == 0)
  21                return 0;
  22
  23        read_lock(&tasklist_lock);
  24        p = find_task_by_vpid(pid);
  25        if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
  26                   same_thread_group(p, current) : thread_group_leader(p))) {
  27                error = -EINVAL;
  28        }
  29        read_unlock(&tasklist_lock);
  30
  31        return error;
  32}
  33
  34static inline union cpu_time_count
  35timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
  36{
  37        union cpu_time_count ret;
  38        ret.sched = 0;          /* high half always zero when .cpu used */
  39        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  40                ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
  41        } else {
  42                ret.cpu = timespec_to_cputime(tp);
  43        }
  44        return ret;
  45}
  46
  47static void sample_to_timespec(const clockid_t which_clock,
  48                               union cpu_time_count cpu,
  49                               struct timespec *tp)
  50{
  51        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
  52                *tp = ns_to_timespec(cpu.sched);
  53        else
  54                cputime_to_timespec(cpu.cpu, tp);
  55}
  56
  57static inline int cpu_time_before(const clockid_t which_clock,
  58                                  union cpu_time_count now,
  59                                  union cpu_time_count then)
  60{
  61        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  62                return now.sched < then.sched;
  63        }  else {
  64                return cputime_lt(now.cpu, then.cpu);
  65        }
  66}
  67static inline void cpu_time_add(const clockid_t which_clock,
  68                                union cpu_time_count *acc,
  69                                union cpu_time_count val)
  70{
  71        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  72                acc->sched += val.sched;
  73        }  else {
  74                acc->cpu = cputime_add(acc->cpu, val.cpu);
  75        }
  76}
  77static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
  78                                                union cpu_time_count a,
  79                                                union cpu_time_count b)
  80{
  81        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  82                a.sched -= b.sched;
  83        }  else {
  84                a.cpu = cputime_sub(a.cpu, b.cpu);
  85        }
  86        return a;
  87}
  88
  89/*
  90 * Divide and limit the result to res >= 1
  91 *
  92 * This is necessary to prevent signal delivery starvation, when the result of
  93 * the division would be rounded down to 0.
  94 */
  95static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
  96{
  97        cputime_t res = cputime_div(time, div);
  98
  99        return max_t(cputime_t, res, 1);
 100}
 101
 102/*
 103 * Update expiry time from increment, and increase overrun count,
 104 * given the current clock sample.
 105 */
 106static void bump_cpu_timer(struct k_itimer *timer,
 107                                  union cpu_time_count now)
 108{
 109        int i;
 110
 111        if (timer->it.cpu.incr.sched == 0)
 112                return;
 113
 114        if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
 115                unsigned long long delta, incr;
 116
 117                if (now.sched < timer->it.cpu.expires.sched)
 118                        return;
 119                incr = timer->it.cpu.incr.sched;
 120                delta = now.sched + incr - timer->it.cpu.expires.sched;
 121                /* Don't use (incr*2 < delta), incr*2 might overflow. */
 122                for (i = 0; incr < delta - incr; i++)
 123                        incr = incr << 1;
 124                for (; i >= 0; incr >>= 1, i--) {
 125                        if (delta < incr)
 126                                continue;
 127                        timer->it.cpu.expires.sched += incr;
 128                        timer->it_overrun += 1 << i;
 129                        delta -= incr;
 130                }
 131        } else {
 132                cputime_t delta, incr;
 133
 134                if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
 135                        return;
 136                incr = timer->it.cpu.incr.cpu;
 137                delta = cputime_sub(cputime_add(now.cpu, incr),
 138                                    timer->it.cpu.expires.cpu);
 139                /* Don't use (incr*2 < delta), incr*2 might overflow. */
 140                for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
 141                             incr = cputime_add(incr, incr);
 142                for (; i >= 0; incr = cputime_halve(incr), i--) {
 143                        if (cputime_lt(delta, incr))
 144                                continue;
 145                        timer->it.cpu.expires.cpu =
 146                                cputime_add(timer->it.cpu.expires.cpu, incr);
 147                        timer->it_overrun += 1 << i;
 148                        delta = cputime_sub(delta, incr);
 149                }
 150        }
 151}
 152
 153static inline cputime_t prof_ticks(struct task_struct *p)
 154{
 155        return cputime_add(p->utime, p->stime);
 156}
 157static inline cputime_t virt_ticks(struct task_struct *p)
 158{
 159        return p->utime;
 160}
 161static inline unsigned long long sched_ns(struct task_struct *p)
 162{
 163        return task_sched_runtime(p);
 164}
 165
 166int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 167{
 168        int error = check_clock(which_clock);
 169        if (!error) {
 170                tp->tv_sec = 0;
 171                tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
 172                if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 173                        /*
 174                         * If sched_clock is using a cycle counter, we
 175                         * don't have any idea of its true resolution
 176                         * exported, but it is much more than 1s/HZ.
 177                         */
 178                        tp->tv_nsec = 1;
 179                }
 180        }
 181        return error;
 182}
 183
 184int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 185{
 186        /*
 187         * You can never reset a CPU clock, but we check for other errors
 188         * in the call before failing with EPERM.
 189         */
 190        int error = check_clock(which_clock);
 191        if (error == 0) {
 192                error = -EPERM;
 193        }
 194        return error;
 195}
 196
 197
 198/*
 199 * Sample a per-thread clock for the given task.
 200 */
 201static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 202                            union cpu_time_count *cpu)
 203{
 204        switch (CPUCLOCK_WHICH(which_clock)) {
 205        default:
 206                return -EINVAL;
 207        case CPUCLOCK_PROF:
 208                cpu->cpu = prof_ticks(p);
 209                break;
 210        case CPUCLOCK_VIRT:
 211                cpu->cpu = virt_ticks(p);
 212                break;
 213        case CPUCLOCK_SCHED:
 214                cpu->sched = sched_ns(p);
 215                break;
 216        }
 217        return 0;
 218}
 219
 220/*
 221 * Sample a process (thread group) clock for the given group_leader task.
 222 * Must be called with tasklist_lock held for reading.
 223 * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
 224 */
 225static int cpu_clock_sample_group_locked(unsigned int clock_idx,
 226                                         struct task_struct *p,
 227                                         union cpu_time_count *cpu)
 228{
 229        struct task_struct *t = p;
 230        switch (clock_idx) {
 231        default:
 232                return -EINVAL;
 233        case CPUCLOCK_PROF:
 234                cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
 235                do {
 236                        cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
 237                        t = next_thread(t);
 238                } while (t != p);
 239                break;
 240        case CPUCLOCK_VIRT:
 241                cpu->cpu = p->signal->utime;
 242                do {
 243                        cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
 244                        t = next_thread(t);
 245                } while (t != p);
 246                break;
 247        case CPUCLOCK_SCHED:
 248                cpu->sched = p->signal->sum_sched_runtime;
 249                /* Add in each other live thread.  */
 250                while ((t = next_thread(t)) != p) {
 251                        cpu->sched += t->se.sum_exec_runtime;
 252                }
 253                cpu->sched += sched_ns(p);
 254                break;
 255        }
 256        return 0;
 257}
 258
 259/*
 260 * Sample a process (thread group) clock for the given group_leader task.
 261 * Must be called with tasklist_lock held for reading.
 262 */
 263static int cpu_clock_sample_group(const clockid_t which_clock,
 264                                  struct task_struct *p,
 265                                  union cpu_time_count *cpu)
 266{
 267        int ret;
 268        unsigned long flags;
 269        spin_lock_irqsave(&p->sighand->siglock, flags);
 270        ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
 271                                            cpu);
 272        spin_unlock_irqrestore(&p->sighand->siglock, flags);
 273        return ret;
 274}
 275
 276
 277int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 278{
 279        const pid_t pid = CPUCLOCK_PID(which_clock);
 280        int error = -EINVAL;
 281        union cpu_time_count rtn;
 282
 283        if (pid == 0) {
 284                /*
 285                 * Special case constant value for our own clocks.
 286                 * We don't have to do any lookup to find ourselves.
 287                 */
 288                if (CPUCLOCK_PERTHREAD(which_clock)) {
 289                        /*
 290                         * Sampling just ourselves we can do with no locking.
 291                         */
 292                        error = cpu_clock_sample(which_clock,
 293                                                 current, &rtn);
 294                } else {
 295                        read_lock(&tasklist_lock);
 296                        error = cpu_clock_sample_group(which_clock,
 297                                                       current, &rtn);
 298                        read_unlock(&tasklist_lock);
 299                }
 300        } else {
 301                /*
 302                 * Find the given PID, and validate that the caller
 303                 * should be able to see it.
 304                 */
 305                struct task_struct *p;
 306                rcu_read_lock();
 307                p = find_task_by_vpid(pid);
 308                if (p) {
 309                        if (CPUCLOCK_PERTHREAD(which_clock)) {
 310                                if (same_thread_group(p, current)) {
 311                                        error = cpu_clock_sample(which_clock,
 312                                                                 p, &rtn);
 313                                }
 314                        } else {
 315                                read_lock(&tasklist_lock);
 316                                if (thread_group_leader(p) && p->signal) {
 317                                        error =
 318                                            cpu_clock_sample_group(which_clock,
 319                                                                   p, &rtn);
 320                                }
 321                                read_unlock(&tasklist_lock);
 322                        }
 323                }
 324                rcu_read_unlock();
 325        }
 326
 327        if (error)
 328                return error;
 329        sample_to_timespec(which_clock, rtn, tp);
 330        return 0;
 331}
 332
 333
 334/*
 335 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
 336 * This is called from sys_timer_create with the new timer already locked.
 337 */
 338int posix_cpu_timer_create(struct k_itimer *new_timer)
 339{
 340        int ret = 0;
 341        const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
 342        struct task_struct *p;
 343
 344        if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
 345                return -EINVAL;
 346
 347        INIT_LIST_HEAD(&new_timer->it.cpu.entry);
 348        new_timer->it.cpu.incr.sched = 0;
 349        new_timer->it.cpu.expires.sched = 0;
 350
 351        read_lock(&tasklist_lock);
 352        if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
 353                if (pid == 0) {
 354                        p = current;
 355                } else {
 356                        p = find_task_by_vpid(pid);
 357                        if (p && !same_thread_group(p, current))
 358                                p = NULL;
 359                }
 360        } else {
 361                if (pid == 0) {
 362                        p = current->group_leader;
 363                } else {
 364                        p = find_task_by_vpid(pid);
 365                        if (p && !thread_group_leader(p))
 366                                p = NULL;
 367                }
 368        }
 369        new_timer->it.cpu.task = p;
 370        if (p) {
 371                get_task_struct(p);
 372        } else {
 373                ret = -EINVAL;
 374        }
 375        read_unlock(&tasklist_lock);
 376
 377        return ret;
 378}
 379
 380/*
 381 * Clean up a CPU-clock timer that is about to be destroyed.
 382 * This is called from timer deletion with the timer already locked.
 383 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 384 * and try again.  (This happens when the timer is in the middle of firing.)
 385 */
 386int posix_cpu_timer_del(struct k_itimer *timer)
 387{
 388        struct task_struct *p = timer->it.cpu.task;
 389        int ret = 0;
 390
 391        if (likely(p != NULL)) {
 392                read_lock(&tasklist_lock);
 393                if (unlikely(p->signal == NULL)) {
 394                        /*
 395                         * We raced with the reaping of the task.
 396                         * The deletion should have cleared us off the list.
 397                         */
 398                        BUG_ON(!list_empty(&timer->it.cpu.entry));
 399                } else {
 400                        spin_lock(&p->sighand->siglock);
 401                        if (timer->it.cpu.firing)
 402                                ret = TIMER_RETRY;
 403                        else
 404                                list_del(&timer->it.cpu.entry);
 405                        spin_unlock(&p->sighand->siglock);
 406                }
 407                read_unlock(&tasklist_lock);
 408
 409                if (!ret)
 410                        put_task_struct(p);
 411        }
 412
 413        return ret;
 414}
 415
 416/*
 417 * Clean out CPU timers still ticking when a thread exited.  The task
 418 * pointer is cleared, and the expiry time is replaced with the residual
 419 * time for later timer_gettime calls to return.
 420 * This must be called with the siglock held.
 421 */
 422static void cleanup_timers(struct list_head *head,
 423                           cputime_t utime, cputime_t stime,
 424                           unsigned long long sum_exec_runtime)
 425{
 426        struct cpu_timer_list *timer, *next;
 427        cputime_t ptime = cputime_add(utime, stime);
 428
 429        list_for_each_entry_safe(timer, next, head, entry) {
 430                list_del_init(&timer->entry);
 431                if (cputime_lt(timer->expires.cpu, ptime)) {
 432                        timer->expires.cpu = cputime_zero;
 433                } else {
 434                        timer->expires.cpu = cputime_sub(timer->expires.cpu,
 435                                                         ptime);
 436                }
 437        }
 438
 439        ++head;
 440        list_for_each_entry_safe(timer, next, head, entry) {
 441                list_del_init(&timer->entry);
 442                if (cputime_lt(timer->expires.cpu, utime)) {
 443                        timer->expires.cpu = cputime_zero;
 444                } else {
 445                        timer->expires.cpu = cputime_sub(timer->expires.cpu,
 446                                                         utime);
 447                }
 448        }
 449
 450        ++head;
 451        list_for_each_entry_safe(timer, next, head, entry) {
 452                list_del_init(&timer->entry);
 453                if (timer->expires.sched < sum_exec_runtime) {
 454                        timer->expires.sched = 0;
 455                } else {
 456                        timer->expires.sched -= sum_exec_runtime;
 457                }
 458        }
 459}
 460
 461/*
 462 * These are both called with the siglock held, when the current thread
 463 * is being reaped.  When the final (leader) thread in the group is reaped,
 464 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
 465 */
 466void posix_cpu_timers_exit(struct task_struct *tsk)
 467{
 468        cleanup_timers(tsk->cpu_timers,
 469                       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
 470
 471}
 472void posix_cpu_timers_exit_group(struct task_struct *tsk)
 473{
 474        cleanup_timers(tsk->signal->cpu_timers,
 475                       cputime_add(tsk->utime, tsk->signal->utime),
 476                       cputime_add(tsk->stime, tsk->signal->stime),
 477                     tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
 478}
 479
 480
 481/*
 482 * Set the expiry times of all the threads in the process so one of them
 483 * will go off before the process cumulative expiry total is reached.
 484 */
 485static void process_timer_rebalance(struct task_struct *p,
 486                                    unsigned int clock_idx,
 487                                    union cpu_time_count expires,
 488                                    union cpu_time_count val)
 489{
 490        cputime_t ticks, left;
 491        unsigned long long ns, nsleft;
 492        struct task_struct *t = p;
 493        unsigned int nthreads = atomic_read(&p->signal->live);
 494
 495        if (!nthreads)
 496                return;
 497
 498        switch (clock_idx) {
 499        default:
 500                BUG();
 501                break;
 502        case CPUCLOCK_PROF:
 503                left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
 504                                       nthreads);
 505                do {
 506                        if (likely(!(t->flags & PF_EXITING))) {
 507                                ticks = cputime_add(prof_ticks(t), left);
 508                                if (cputime_eq(t->it_prof_expires,
 509                                               cputime_zero) ||
 510                                    cputime_gt(t->it_prof_expires, ticks)) {
 511                                        t->it_prof_expires = ticks;
 512                                }
 513                        }
 514                        t = next_thread(t);
 515                } while (t != p);
 516                break;
 517        case CPUCLOCK_VIRT:
 518                left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
 519                                       nthreads);
 520                do {
 521                        if (likely(!(t->flags & PF_EXITING))) {
 522                                ticks = cputime_add(virt_ticks(t), left);
 523                                if (cputime_eq(t->it_virt_expires,
 524                                               cputime_zero) ||
 525                                    cputime_gt(t->it_virt_expires, ticks)) {
 526                                        t->it_virt_expires = ticks;
 527                                }
 528                        }
 529                        t = next_thread(t);
 530                } while (t != p);
 531                break;
 532        case CPUCLOCK_SCHED:
 533                nsleft = expires.sched - val.sched;
 534                do_div(nsleft, nthreads);
 535                nsleft = max_t(unsigned long long, nsleft, 1);
 536                do {
 537                        if (likely(!(t->flags & PF_EXITING))) {
 538                                ns = t->se.sum_exec_runtime + nsleft;
 539                                if (t->it_sched_expires == 0 ||
 540                                    t->it_sched_expires > ns) {
 541                                        t->it_sched_expires = ns;
 542                                }
 543                        }
 544                        t = next_thread(t);
 545                } while (t != p);
 546                break;
 547        }
 548}
 549
 550static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
 551{
 552        /*
 553         * That's all for this thread or process.
 554         * We leave our residual in expires to be reported.
 555         */
 556        put_task_struct(timer->it.cpu.task);
 557        timer->it.cpu.task = NULL;
 558        timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
 559                                             timer->it.cpu.expires,
 560                                             now);
 561}
 562
 563/*
 564 * Insert the timer on the appropriate list before any timers that
 565 * expire later.  This must be called with the tasklist_lock held
 566 * for reading, and interrupts disabled.
 567 */
 568static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 569{
 570        struct task_struct *p = timer->it.cpu.task;
 571        struct list_head *head, *listpos;
 572        struct cpu_timer_list *const nt = &timer->it.cpu;
 573        struct cpu_timer_list *next;
 574        unsigned long i;
 575
 576        head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
 577                p->cpu_timers : p->signal->cpu_timers);
 578        head += CPUCLOCK_WHICH(timer->it_clock);
 579
 580        BUG_ON(!irqs_disabled());
 581        spin_lock(&p->sighand->siglock);
 582
 583        listpos = head;
 584        if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
 585                list_for_each_entry(next, head, entry) {
 586                        if (next->expires.sched > nt->expires.sched)
 587                                break;
 588                        listpos = &next->entry;
 589                }
 590        } else {
 591                list_for_each_entry(next, head, entry) {
 592                        if (cputime_gt(next->expires.cpu, nt->expires.cpu))
 593                                break;
 594                        listpos = &next->entry;
 595                }
 596        }
 597        list_add(&nt->entry, listpos);
 598
 599        if (listpos == head) {
 600                /*
 601                 * We are the new earliest-expiring timer.
 602                 * If we are a thread timer, there can always
 603                 * be a process timer telling us to stop earlier.
 604                 */
 605
 606                if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 607                        switch (CPUCLOCK_WHICH(timer->it_clock)) {
 608                        default:
 609                                BUG();
 610                        case CPUCLOCK_PROF:
 611                                if (cputime_eq(p->it_prof_expires,
 612                                               cputime_zero) ||
 613                                    cputime_gt(p->it_prof_expires,
 614                                               nt->expires.cpu))
 615                                        p->it_prof_expires = nt->expires.cpu;
 616                                break;
 617                        case CPUCLOCK_VIRT:
 618                                if (cputime_eq(p->it_virt_expires,
 619                                               cputime_zero) ||
 620                                    cputime_gt(p->it_virt_expires,
 621                                               nt->expires.cpu))
 622                                        p->it_virt_expires = nt->expires.cpu;
 623                                break;
 624                        case CPUCLOCK_SCHED:
 625                                if (p->it_sched_expires == 0 ||
 626                                    p->it_sched_expires > nt->expires.sched)
 627                                        p->it_sched_expires = nt->expires.sched;
 628                                break;
 629                        }
 630                } else {
 631                        /*
 632                         * For a process timer, we must balance
 633                         * all the live threads' expirations.
 634                         */
 635                        switch (CPUCLOCK_WHICH(timer->it_clock)) {
 636                        default:
 637                                BUG();
 638                        case CPUCLOCK_VIRT:
 639                                if (!cputime_eq(p->signal->it_virt_expires,
 640                                                cputime_zero) &&
 641                                    cputime_lt(p->signal->it_virt_expires,
 642                                               timer->it.cpu.expires.cpu))
 643                                        break;
 644                                goto rebalance;
 645                        case CPUCLOCK_PROF:
 646                                if (!cputime_eq(p->signal->it_prof_expires,
 647                                                cputime_zero) &&
 648                                    cputime_lt(p->signal->it_prof_expires,
 649                                               timer->it.cpu.expires.cpu))
 650                                        break;
 651                                i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
 652                                if (i != RLIM_INFINITY &&
 653                                    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
 654                                        break;
 655                                goto rebalance;
 656                        case CPUCLOCK_SCHED:
 657                        rebalance:
 658                                process_timer_rebalance(
 659                                        timer->it.cpu.task,
 660                                        CPUCLOCK_WHICH(timer->it_clock),
 661                                        timer->it.cpu.expires, now);
 662                                break;
 663                        }
 664                }
 665        }
 666
 667        spin_unlock(&p->sighand->siglock);
 668}
 669
 670/*
 671 * The timer is locked, fire it and arrange for its reload.
 672 */
 673static void cpu_timer_fire(struct k_itimer *timer)
 674{
 675        if (unlikely(timer->sigq == NULL)) {
 676                /*
 677                 * This a special case for clock_nanosleep,
 678                 * not a normal timer from sys_timer_create.
 679                 */
 680                wake_up_process(timer->it_process);
 681                timer->it.cpu.expires.sched = 0;
 682        } else if (timer->it.cpu.incr.sched == 0) {
 683                /*
 684                 * One-shot timer.  Clear it as soon as it's fired.
 685                 */
 686                posix_timer_event(timer, 0);
 687                timer->it.cpu.expires.sched = 0;
 688        } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
 689                /*
 690                 * The signal did not get queued because the signal
 691                 * was ignored, so we won't get any callback to
 692                 * reload the timer.  But we need to keep it
 693                 * ticking in case the signal is deliverable next time.
 694                 */
 695                posix_cpu_timer_schedule(timer);
 696        }
 697}
 698
 699/*
 700 * Guts of sys_timer_settime for CPU timers.
 701 * This is called with the timer locked and interrupts disabled.
 702 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 703 * and try again.  (This happens when the timer is in the middle of firing.)
 704 */
 705int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 706                        struct itimerspec *new, struct itimerspec *old)
 707{
 708        struct task_struct *p = timer->it.cpu.task;
 709        union cpu_time_count old_expires, new_expires, val;
 710        int ret;
 711
 712        if (unlikely(p == NULL)) {
 713                /*
 714                 * Timer refers to a dead task's clock.
 715                 */
 716                return -ESRCH;
 717        }
 718
 719        new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
 720
 721        read_lock(&tasklist_lock);
 722        /*
 723         * We need the tasklist_lock to protect against reaping that
 724         * clears p->signal.  If p has just been reaped, we can no
 725         * longer get any information about it at all.
 726         */
 727        if (unlikely(p->signal == NULL)) {
 728                read_unlock(&tasklist_lock);
 729                put_task_struct(p);
 730                timer->it.cpu.task = NULL;
 731                return -ESRCH;
 732        }
 733
 734        /*
 735         * Disarm any old timer after extracting its expiry time.
 736         */
 737        BUG_ON(!irqs_disabled());
 738
 739        ret = 0;
 740        spin_lock(&p->sighand->siglock);
 741        old_expires = timer->it.cpu.expires;
 742        if (unlikely(timer->it.cpu.firing)) {
 743                timer->it.cpu.firing = -1;
 744                ret = TIMER_RETRY;
 745        } else
 746                list_del_init(&timer->it.cpu.entry);
 747        spin_unlock(&p->sighand->siglock);
 748
 749        /*
 750         * We need to sample the current value to convert the new
 751         * value from to relative and absolute, and to convert the
 752         * old value from absolute to relative.  To set a process
 753         * timer, we need a sample to balance the thread expiry
 754         * times (in arm_timer).  With an absolute time, we must
 755         * check if it's already passed.  In short, we need a sample.
 756         */
 757        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 758                cpu_clock_sample(timer->it_clock, p, &val);
 759        } else {
 760                cpu_clock_sample_group(timer->it_clock, p, &val);
 761        }
 762
 763        if (old) {
 764                if (old_expires.sched == 0) {
 765                        old->it_value.tv_sec = 0;
 766                        old->it_value.tv_nsec = 0;
 767                } else {
 768                        /*
 769                         * Update the timer in case it has
 770                         * overrun already.  If it has,
 771                         * we'll report it as having overrun
 772                         * and with the next reloaded timer
 773                         * already ticking, though we are
 774                         * swallowing that pending
 775                         * notification here to install the
 776                         * new setting.
 777                         */
 778                        bump_cpu_timer(timer, val);
 779                        if (cpu_time_before(timer->it_clock, val,
 780                                            timer->it.cpu.expires)) {
 781                                old_expires = cpu_time_sub(
 782                                        timer->it_clock,
 783                                        timer->it.cpu.expires, val);
 784                                sample_to_timespec(timer->it_clock,
 785                                                   old_expires,
 786                                                   &old->it_value);
 787                        } else {
 788                                old->it_value.tv_nsec = 1;
 789                                old->it_value.tv_sec = 0;
 790                        }
 791                }
 792        }
 793
 794        if (unlikely(ret)) {
 795                /*
 796                 * We are colliding with the timer actually firing.
 797                 * Punt after filling in the timer's old value, and
 798                 * disable this firing since we are already reporting
 799                 * it as an overrun (thanks to bump_cpu_timer above).
 800                 */
 801                read_unlock(&tasklist_lock);
 802                goto out;
 803        }
 804
 805        if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
 806                cpu_time_add(timer->it_clock, &new_expires, val);
 807        }
 808
 809        /*
 810         * Install the new expiry time (or zero).
 811         * For a timer with no notification action, we don't actually
 812         * arm the timer (we'll just fake it for timer_gettime).
 813         */
 814        timer->it.cpu.expires = new_expires;
 815        if (new_expires.sched != 0 &&
 816            (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
 817            cpu_time_before(timer->it_clock, val, new_expires)) {
 818                arm_timer(timer, val);
 819        }
 820
 821        read_unlock(&tasklist_lock);
 822
 823        /*
 824         * Install the new reload setting, and
 825         * set up the signal and overrun bookkeeping.
 826         */
 827        timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
 828                                                &new->it_interval);
 829
 830        /*
 831         * This acts as a modification timestamp for the timer,
 832         * so any automatic reload attempt will punt on seeing
 833         * that we have reset the timer manually.
 834         */
 835        timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
 836                ~REQUEUE_PENDING;
 837        timer->it_overrun_last = 0;
 838        timer->it_overrun = -1;
 839
 840        if (new_expires.sched != 0 &&
 841            (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
 842            !cpu_time_before(timer->it_clock, val, new_expires)) {
 843                /*
 844                 * The designated time already passed, so we notify
 845                 * immediately, even if the thread never runs to
 846                 * accumulate more time on this clock.
 847                 */
 848                cpu_timer_fire(timer);
 849        }
 850
 851        ret = 0;
 852 out:
 853        if (old) {
 854                sample_to_timespec(timer->it_clock,
 855                                   timer->it.cpu.incr, &old->it_interval);
 856        }
 857        return ret;
 858}
 859
 860void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 861{
 862        union cpu_time_count now;
 863        struct task_struct *p = timer->it.cpu.task;
 864        int clear_dead;
 865
 866        /*
 867         * Easy part: convert the reload time.
 868         */
 869        sample_to_timespec(timer->it_clock,
 870                           timer->it.cpu.incr, &itp->it_interval);
 871
 872        if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all.  */
 873                itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
 874                return;
 875        }
 876
 877        if (unlikely(p == NULL)) {
 878                /*
 879                 * This task already died and the timer will never fire.
 880                 * In this case, expires is actually the dead value.
 881                 */
 882        dead:
 883                sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
 884                                   &itp->it_value);
 885                return;
 886        }
 887
 888        /*
 889         * Sample the clock to take the difference with the expiry time.
 890         */
 891        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 892                cpu_clock_sample(timer->it_clock, p, &now);
 893                clear_dead = p->exit_state;
 894        } else {
 895                read_lock(&tasklist_lock);
 896                if (unlikely(p->signal == NULL)) {
 897                        /*
 898                         * The process has been reaped.
 899                         * We can't even collect a sample any more.
 900                         * Call the timer disarmed, nothing else to do.
 901                         */
 902                        put_task_struct(p);
 903                        timer->it.cpu.task = NULL;
 904                        timer->it.cpu.expires.sched = 0;
 905                        read_unlock(&tasklist_lock);
 906                        goto dead;
 907                } else {
 908                        cpu_clock_sample_group(timer->it_clock, p, &now);
 909                        clear_dead = (unlikely(p->exit_state) &&
 910                                      thread_group_empty(p));
 911                }
 912                read_unlock(&tasklist_lock);
 913        }
 914
 915        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
 916                if (timer->it.cpu.incr.sched == 0 &&
 917                    cpu_time_before(timer->it_clock,
 918                                    timer->it.cpu.expires, now)) {
 919                        /*
 920                         * Do-nothing timer expired and has no reload,
 921                         * so it's as if it was never set.
 922                         */
 923                        timer->it.cpu.expires.sched = 0;
 924                        itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
 925                        return;
 926                }
 927                /*
 928                 * Account for any expirations and reloads that should
 929                 * have happened.
 930                 */
 931                bump_cpu_timer(timer, now);
 932        }
 933
 934        if (unlikely(clear_dead)) {
 935                /*
 936                 * We've noticed that the thread is dead, but
 937                 * not yet reaped.  Take this opportunity to
 938                 * drop our task ref.
 939                 */
 940                clear_dead_task(timer, now);
 941                goto dead;
 942        }
 943
 944        if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
 945                sample_to_timespec(timer->it_clock,
 946                                   cpu_time_sub(timer->it_clock,
 947                                                timer->it.cpu.expires, now),
 948                                   &itp->it_value);
 949        } else {
 950                /*
 951                 * The timer should have expired already, but the firing
 952                 * hasn't taken place yet.  Say it's just about to expire.
 953                 */
 954                itp->it_value.tv_nsec = 1;
 955                itp->it_value.tv_sec = 0;
 956        }
 957}
 958
 959/*
 960 * Check for any per-thread CPU timers that have fired and move them off
 961 * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
 962 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
 963 */
 964static void check_thread_timers(struct task_struct *tsk,
 965                                struct list_head *firing)
 966{
 967        int maxfire;
 968        struct list_head *timers = tsk->cpu_timers;
 969        struct signal_struct *const sig = tsk->signal;
 970
 971        maxfire = 20;
 972        tsk->it_prof_expires = cputime_zero;
 973        while (!list_empty(timers)) {
 974                struct cpu_timer_list *t = list_first_entry(timers,
 975                                                      struct cpu_timer_list,
 976                                                      entry);
 977                if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
 978                        tsk->it_prof_expires = t->expires.cpu;
 979                        break;
 980                }
 981                t->firing = 1;
 982                list_move_tail(&t->entry, firing);
 983        }
 984
 985        ++timers;
 986        maxfire = 20;
 987        tsk->it_virt_expires = cputime_zero;
 988        while (!list_empty(timers)) {
 989                struct cpu_timer_list *t = list_first_entry(timers,
 990                                                      struct cpu_timer_list,
 991                                                      entry);
 992                if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
 993                        tsk->it_virt_expires = t->expires.cpu;
 994                        break;
 995                }
 996                t->firing = 1;
 997                list_move_tail(&t->entry, firing);
 998        }
 999
1000        ++timers;
1001        maxfire = 20;
1002        tsk->it_sched_expires = 0;
1003        while (!list_empty(timers)) {
1004                struct cpu_timer_list *t = list_first_entry(timers,
1005                                                      struct cpu_timer_list,
1006                                                      entry);
1007                if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
1008                        tsk->it_sched_expires = t->expires.sched;
1009                        break;
1010                }
1011                t->firing = 1;
1012                list_move_tail(&t->entry, firing);
1013        }
1014
1015        /*
1016         * Check for the special case thread timers.
1017         */
1018        if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
1019                unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
1020                unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
1021
1022                if (hard != RLIM_INFINITY &&
1023                    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
1024                        /*
1025                         * At the hard limit, we just die.
1026                         * No need to calculate anything else now.
1027                         */
1028                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1029                        return;
1030                }
1031                if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
1032                        /*
1033                         * At the soft limit, send a SIGXCPU every second.
1034                         */
1035                        if (sig->rlim[RLIMIT_RTTIME].rlim_cur
1036                            < sig->rlim[RLIMIT_RTTIME].rlim_max) {
1037                                sig->rlim[RLIMIT_RTTIME].rlim_cur +=
1038                                                                USEC_PER_SEC;
1039                        }
1040                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1041                }
1042        }
1043}
1044
1045/*
1046 * Check for any per-thread CPU timers that have fired and move them
1047 * off the tsk->*_timers list onto the firing list.  Per-thread timers
1048 * have already been taken off.
1049 */
1050static void check_process_timers(struct task_struct *tsk,
1051                                 struct list_head *firing)
1052{
1053        int maxfire;
1054        struct signal_struct *const sig = tsk->signal;
1055        cputime_t utime, stime, ptime, virt_expires, prof_expires;
1056        unsigned long long sum_sched_runtime, sched_expires;
1057        struct task_struct *t;
1058        struct list_head *timers = sig->cpu_timers;
1059
1060        /*
1061         * Don't sample the current process CPU clocks if there are no timers.
1062         */
1063        if (list_empty(&timers[CPUCLOCK_PROF]) &&
1064            cputime_eq(sig->it_prof_expires, cputime_zero) &&
1065            sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
1066            list_empty(&timers[CPUCLOCK_VIRT]) &&
1067            cputime_eq(sig->it_virt_expires, cputime_zero) &&
1068            list_empty(&timers[CPUCLOCK_SCHED]))
1069                return;
1070
1071        /*
1072         * Collect the current process totals.
1073         */
1074        utime = sig->utime;
1075        stime = sig->stime;
1076        sum_sched_runtime = sig->sum_sched_runtime;
1077        t = tsk;
1078        do {
1079                utime = cputime_add(utime, t->utime);
1080                stime = cputime_add(stime, t->stime);
1081                sum_sched_runtime += t->se.sum_exec_runtime;
1082                t = next_thread(t);
1083        } while (t != tsk);
1084        ptime = cputime_add(utime, stime);
1085
1086        maxfire = 20;
1087        prof_expires = cputime_zero;
1088        while (!list_empty(timers)) {
1089                struct cpu_timer_list *tl = list_first_entry(timers,
1090                                                      struct cpu_timer_list,
1091                                                      entry);
1092                if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) {
1093                        prof_expires = tl->expires.cpu;
1094                        break;
1095                }
1096                tl->firing = 1;
1097                list_move_tail(&tl->entry, firing);
1098        }
1099
1100        ++timers;
1101        maxfire = 20;
1102        virt_expires = cputime_zero;
1103        while (!list_empty(timers)) {
1104                struct cpu_timer_list *tl = list_first_entry(timers,
1105                                                      struct cpu_timer_list,
1106                                                      entry);
1107                if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) {
1108                        virt_expires = tl->expires.cpu;
1109                        break;
1110                }
1111                tl->firing = 1;
1112                list_move_tail(&tl->entry, firing);
1113        }
1114
1115        ++timers;
1116        maxfire = 20;
1117        sched_expires = 0;
1118        while (!list_empty(timers)) {
1119                struct cpu_timer_list *tl = list_first_entry(timers,
1120                                                      struct cpu_timer_list,
1121                                                      entry);
1122                if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
1123                        sched_expires = tl->expires.sched;
1124                        break;
1125                }
1126                tl->firing = 1;
1127                list_move_tail(&tl->entry, firing);
1128        }
1129
1130        /*
1131         * Check for the special case process timers.
1132         */
1133        if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
1134                if (cputime_ge(ptime, sig->it_prof_expires)) {
1135                        /* ITIMER_PROF fires and reloads.  */
1136                        sig->it_prof_expires = sig->it_prof_incr;
1137                        if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
1138                                sig->it_prof_expires = cputime_add(
1139                                        sig->it_prof_expires, ptime);
1140                        }
1141                        __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
1142                }
1143                if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
1144                    (cputime_eq(prof_expires, cputime_zero) ||
1145                     cputime_lt(sig->it_prof_expires, prof_expires))) {
1146                        prof_expires = sig->it_prof_expires;
1147                }
1148        }
1149        if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
1150                if (cputime_ge(utime, sig->it_virt_expires)) {
1151                        /* ITIMER_VIRTUAL fires and reloads.  */
1152                        sig->it_virt_expires = sig->it_virt_incr;
1153                        if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
1154                                sig->it_virt_expires = cputime_add(
1155                                        sig->it_virt_expires, utime);
1156                        }
1157                        __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
1158                }
1159                if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
1160                    (cputime_eq(virt_expires, cputime_zero) ||
1161                     cputime_lt(sig->it_virt_expires, virt_expires))) {
1162                        virt_expires = sig->it_virt_expires;
1163                }
1164        }
1165        if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
1166                unsigned long psecs = cputime_to_secs(ptime);
1167                cputime_t x;
1168                if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
1169                        /*
1170                         * At the hard limit, we just die.
1171                         * No need to calculate anything else now.
1172                         */
1173                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1174                        return;
1175                }
1176                if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
1177                        /*
1178                         * At the soft limit, send a SIGXCPU every second.
1179                         */
1180                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1181                        if (sig->rlim[RLIMIT_CPU].rlim_cur
1182                            < sig->rlim[RLIMIT_CPU].rlim_max) {
1183                                sig->rlim[RLIMIT_CPU].rlim_cur++;
1184                        }
1185                }
1186                x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
1187                if (cputime_eq(prof_expires, cputime_zero) ||
1188                    cputime_lt(x, prof_expires)) {
1189                        prof_expires = x;
1190                }
1191        }
1192
1193        if (!cputime_eq(prof_expires, cputime_zero) ||
1194            !cputime_eq(virt_expires, cputime_zero) ||
1195            sched_expires != 0) {
1196                /*
1197                 * Rebalance the threads' expiry times for the remaining
1198                 * process CPU timers.
1199                 */
1200
1201                cputime_t prof_left, virt_left, ticks;
1202                unsigned long long sched_left, sched;
1203                const unsigned int nthreads = atomic_read(&sig->live);
1204
1205                if (!nthreads)
1206                        return;
1207
1208                prof_left = cputime_sub(prof_expires, utime);
1209                prof_left = cputime_sub(prof_left, stime);
1210                prof_left = cputime_div_non_zero(prof_left, nthreads);
1211                virt_left = cputime_sub(virt_expires, utime);
1212                virt_left = cputime_div_non_zero(virt_left, nthreads);
1213                if (sched_expires) {
1214                        sched_left = sched_expires - sum_sched_runtime;
1215                        do_div(sched_left, nthreads);
1216                        sched_left = max_t(unsigned long long, sched_left, 1);
1217                } else {
1218                        sched_left = 0;
1219                }
1220                t = tsk;
1221                do {
1222                        if (unlikely(t->flags & PF_EXITING))
1223                                continue;
1224
1225                        ticks = cputime_add(cputime_add(t->utime, t->stime),
1226                                            prof_left);
1227                        if (!cputime_eq(prof_expires, cputime_zero) &&
1228                            (cputime_eq(t->it_prof_expires, cputime_zero) ||
1229                             cputime_gt(t->it_prof_expires, ticks))) {
1230                                t->it_prof_expires = ticks;
1231                        }
1232
1233                        ticks = cputime_add(t->utime, virt_left);
1234                        if (!cputime_eq(virt_expires, cputime_zero) &&
1235                            (cputime_eq(t->it_virt_expires, cputime_zero) ||
1236                             cputime_gt(t->it_virt_expires, ticks))) {
1237                                t->it_virt_expires = ticks;
1238                        }
1239
1240                        sched = t->se.sum_exec_runtime + sched_left;
1241                        if (sched_expires && (t->it_sched_expires == 0 ||
1242                                              t->it_sched_expires > sched)) {
1243                                t->it_sched_expires = sched;
1244                        }
1245                } while ((t = next_thread(t)) != tsk);
1246        }
1247}
1248
1249/*
1250 * This is called from the signal code (via do_schedule_next_timer)
1251 * when the last timer signal was delivered and we have to reload the timer.
1252 */
1253void posix_cpu_timer_schedule(struct k_itimer *timer)
1254{
1255        struct task_struct *p = timer->it.cpu.task;
1256        union cpu_time_count now;
1257
1258        if (unlikely(p == NULL))
1259                /*
1260                 * The task was cleaned up already, no future firings.
1261                 */
1262                goto out;
1263
1264        /*
1265         * Fetch the current sample and update the timer's expiry time.
1266         */
1267        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1268                cpu_clock_sample(timer->it_clock, p, &now);
1269                bump_cpu_timer(timer, now);
1270                if (unlikely(p->exit_state)) {
1271                        clear_dead_task(timer, now);
1272                        goto out;
1273                }
1274                read_lock(&tasklist_lock); /* arm_timer needs it.  */
1275        } else {
1276                read_lock(&tasklist_lock);
1277                if (unlikely(p->signal == NULL)) {
1278                        /*
1279                         * The process has been reaped.
1280                         * We can't even collect a sample any more.
1281                         */
1282                        put_task_struct(p);
1283                        timer->it.cpu.task = p = NULL;
1284                        timer->it.cpu.expires.sched = 0;
1285                        goto out_unlock;
1286                } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1287                        /*
1288                         * We've noticed that the thread is dead, but
1289                         * not yet reaped.  Take this opportunity to
1290                         * drop our task ref.
1291                         */
1292                        clear_dead_task(timer, now);
1293                        goto out_unlock;
1294                }
1295                cpu_clock_sample_group(timer->it_clock, p, &now);
1296                bump_cpu_timer(timer, now);
1297                /* Leave the tasklist_lock locked for the call below.  */
1298        }
1299
1300        /*
1301         * Now re-arm for the new expiry time.
1302         */
1303        arm_timer(timer, now);
1304
1305out_unlock:
1306        read_unlock(&tasklist_lock);
1307
1308out:
1309        timer->it_overrun_last = timer->it_overrun;
1310        timer->it_overrun = -1;
1311        ++timer->it_requeue_pending;
1312}
1313
1314/*
1315 * This is called from the timer interrupt handler.  The irq handler has
1316 * already updated our counts.  We need to check if any timers fire now.
1317 * Interrupts are disabled.
1318 */
1319void run_posix_cpu_timers(struct task_struct *tsk)
1320{
1321        LIST_HEAD(firing);
1322        struct k_itimer *timer, *next;
1323
1324        BUG_ON(!irqs_disabled());
1325
1326#define UNEXPIRED(clock) \
1327                (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
1328                 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
1329
1330        if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
1331            (tsk->it_sched_expires == 0 ||
1332             tsk->se.sum_exec_runtime < tsk->it_sched_expires))
1333                return;
1334
1335#undef  UNEXPIRED
1336
1337        /*
1338         * Double-check with locks held.
1339         */
1340        read_lock(&tasklist_lock);
1341        if (likely(tsk->signal != NULL)) {
1342                spin_lock(&tsk->sighand->siglock);
1343
1344                /*
1345                 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
1346                 * all the timers that are firing, and put them on the firing list.
1347                 */
1348                check_thread_timers(tsk, &firing);
1349                check_process_timers(tsk, &firing);
1350
1351                /*
1352                 * We must release these locks before taking any timer's lock.
1353                 * There is a potential race with timer deletion here, as the
1354                 * siglock now protects our private firing list.  We have set
1355                 * the firing flag in each timer, so that a deletion attempt
1356                 * that gets the timer lock before we do will give it up and
1357                 * spin until we've taken care of that timer below.
1358                 */
1359                spin_unlock(&tsk->sighand->siglock);
1360        }
1361        read_unlock(&tasklist_lock);
1362
1363        /*
1364         * Now that all the timers on our list have the firing flag,
1365         * noone will touch their list entries but us.  We'll take
1366         * each timer's lock before clearing its firing flag, so no
1367         * timer call will interfere.
1368         */
1369        list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1370                int firing;
1371                spin_lock(&timer->it_lock);
1372                list_del_init(&timer->it.cpu.entry);
1373                firing = timer->it.cpu.firing;
1374                timer->it.cpu.firing = 0;
1375                /*
1376                 * The firing flag is -1 if we collided with a reset
1377                 * of the timer, which already reported this
1378                 * almost-firing as an overrun.  So don't generate an event.
1379                 */
1380                if (likely(firing >= 0)) {
1381                        cpu_timer_fire(timer);
1382                }
1383                spin_unlock(&timer->it_lock);
1384        }
1385}
1386
1387/*
1388 * Set one of the process-wide special case CPU timers.
1389 * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
1390 * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
1391 * absolute; non-null for ITIMER_*, where *newval is relative and we update
1392 * it to be absolute, *oldval is absolute and we update it to be relative.
1393 */
1394void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1395                           cputime_t *newval, cputime_t *oldval)
1396{
1397        union cpu_time_count now;
1398        struct list_head *head;
1399
1400        BUG_ON(clock_idx == CPUCLOCK_SCHED);
1401        cpu_clock_sample_group_locked(clock_idx, tsk, &now);
1402
1403        if (oldval) {
1404                if (!cputime_eq(*oldval, cputime_zero)) {
1405                        if (cputime_le(*oldval, now.cpu)) {
1406                                /* Just about to fire. */
1407                                *oldval = jiffies_to_cputime(1);
1408                        } else {
1409                                *oldval = cputime_sub(*oldval, now.cpu);
1410                        }
1411                }
1412
1413                if (cputime_eq(*newval, cputime_zero))
1414                        return;
1415                *newval = cputime_add(*newval, now.cpu);
1416
1417                /*
1418                 * If the RLIMIT_CPU timer will expire before the
1419                 * ITIMER_PROF timer, we have nothing else to do.
1420                 */
1421                if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
1422                    < cputime_to_secs(*newval))
1423                        return;
1424        }
1425
1426        /*
1427         * Check whether there are any process timers already set to fire
1428         * before this one.  If so, we don't have anything more to do.
1429         */
1430        head = &tsk->signal->cpu_timers[clock_idx];
1431        if (list_empty(head) ||
1432            cputime_ge(list_first_entry(head,
1433                                  struct cpu_timer_list, entry)->expires.cpu,
1434                       *newval)) {
1435                /*
1436                 * Rejigger each thread's expiry time so that one will
1437                 * notice before we hit the process-cumulative expiry time.
1438                 */
1439                union cpu_time_count expires = { .sched = 0 };
1440                expires.cpu = *newval;
1441                process_timer_rebalance(tsk, clock_idx, expires, now);
1442        }
1443}
1444
1445static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1446                            struct timespec *rqtp, struct itimerspec *it)
1447{
1448        struct k_itimer timer;
1449        int error;
1450
1451        /*
1452         * Set up a temporary timer and then wait for it to go off.
1453         */
1454        memset(&timer, 0, sizeof timer);
1455        spin_lock_init(&timer.it_lock);
1456        timer.it_clock = which_clock;
1457        timer.it_overrun = -1;
1458        error = posix_cpu_timer_create(&timer);
1459        timer.it_process = current;
1460        if (!error) {
1461                static struct itimerspec zero_it;
1462
1463                memset(it, 0, sizeof *it);
1464                it->it_value = *rqtp;
1465
1466                spin_lock_irq(&timer.it_lock);
1467                error = posix_cpu_timer_set(&timer, flags, it, NULL);
1468                if (error) {
1469                        spin_unlock_irq(&timer.it_lock);
1470                        return error;
1471                }
1472
1473                while (!signal_pending(current)) {
1474                        if (timer.it.cpu.expires.sched == 0) {
1475                                /*
1476                                 * Our timer fired and was reset.
1477                                 */
1478                                spin_unlock_irq(&timer.it_lock);
1479                                return 0;
1480                        }
1481
1482                        /*
1483                         * Block until cpu_timer_fire (or a signal) wakes us.
1484                         */
1485                        __set_current_state(TASK_INTERRUPTIBLE);
1486                        spin_unlock_irq(&timer.it_lock);
1487                        schedule();
1488                        spin_lock_irq(&timer.it_lock);
1489                }
1490
1491                /*
1492                 * We were interrupted by a signal.
1493                 */
1494                sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1495                posix_cpu_timer_set(&timer, 0, &zero_it, it);
1496                spin_unlock_irq(&timer.it_lock);
1497
1498                if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1499                        /*
1500                         * It actually did fire already.
1501                         */
1502                        return 0;
1503                }
1504
1505                error = -ERESTART_RESTARTBLOCK;
1506        }
1507
1508        return error;
1509}
1510
1511int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1512                     struct timespec *rqtp, struct timespec __user *rmtp)
1513{
1514        struct restart_block *restart_block =
1515            &current_thread_info()->restart_block;
1516        struct itimerspec it;
1517        int error;
1518
1519        /*
1520         * Diagnose required errors first.
1521         */
1522        if (CPUCLOCK_PERTHREAD(which_clock) &&
1523            (CPUCLOCK_PID(which_clock) == 0 ||
1524             CPUCLOCK_PID(which_clock) == current->pid))
1525                return -EINVAL;
1526
1527        error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1528
1529        if (error == -ERESTART_RESTARTBLOCK) {
1530
1531                if (flags & TIMER_ABSTIME)
1532                        return -ERESTARTNOHAND;
1533                /*
1534                 * Report back to the user the time still remaining.
1535                 */
1536                if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1537                        return -EFAULT;
1538
1539                restart_block->fn = posix_cpu_nsleep_restart;
1540                restart_block->arg0 = which_clock;
1541                restart_block->arg1 = (unsigned long) rmtp;
1542                restart_block->arg2 = rqtp->tv_sec;
1543                restart_block->arg3 = rqtp->tv_nsec;
1544        }
1545        return error;
1546}
1547
1548long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1549{
1550        clockid_t which_clock = restart_block->arg0;
1551        struct timespec __user *rmtp;
1552        struct timespec t;
1553        struct itimerspec it;
1554        int error;
1555
1556        rmtp = (struct timespec __user *) restart_block->arg1;
1557        t.tv_sec = restart_block->arg2;
1558        t.tv_nsec = restart_block->arg3;
1559
1560        restart_block->fn = do_no_restart_syscall;
1561        error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1562
1563        if (error == -ERESTART_RESTARTBLOCK) {
1564                /*
1565                 * Report back to the user the time still remaining.
1566                 */
1567                if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1568                        return -EFAULT;
1569
1570                restart_block->fn = posix_cpu_nsleep_restart;
1571                restart_block->arg0 = which_clock;
1572                restart_block->arg1 = (unsigned long) rmtp;
1573                restart_block->arg2 = t.tv_sec;
1574                restart_block->arg3 = t.tv_nsec;
1575        }
1576        return error;
1577
1578}
1579
1580
1581#define PROCESS_CLOCK   MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1582#define THREAD_CLOCK    MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1583
1584static int process_cpu_clock_getres(const clockid_t which_clock,
1585                                    struct timespec *tp)
1586{
1587        return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1588}
1589static int process_cpu_clock_get(const clockid_t which_clock,
1590                                 struct timespec *tp)
1591{
1592        return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1593}
1594static int process_cpu_timer_create(struct k_itimer *timer)
1595{
1596        timer->it_clock = PROCESS_CLOCK;
1597        return posix_cpu_timer_create(timer);
1598}
1599static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1600                              struct timespec *rqtp,
1601                              struct timespec __user *rmtp)
1602{
1603        return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1604}
1605static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1606{
1607        return -EINVAL;
1608}
1609static int thread_cpu_clock_getres(const clockid_t which_clock,
1610                                   struct timespec *tp)
1611{
1612        return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1613}
1614static int thread_cpu_clock_get(const clockid_t which_clock,
1615                                struct timespec *tp)
1616{
1617        return posix_cpu_clock_get(THREAD_CLOCK, tp);
1618}
1619static int thread_cpu_timer_create(struct k_itimer *timer)
1620{
1621        timer->it_clock = THREAD_CLOCK;
1622        return posix_cpu_timer_create(timer);
1623}
1624static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
1625                              struct timespec *rqtp, struct timespec __user *rmtp)
1626{
1627        return -EINVAL;
1628}
1629static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
1630{
1631        return -EINVAL;
1632}
1633
1634static __init int init_posix_cpu_timers(void)
1635{
1636        struct k_clock process = {
1637                .clock_getres = process_cpu_clock_getres,
1638                .clock_get = process_cpu_clock_get,
1639                .clock_set = do_posix_clock_nosettime,
1640                .timer_create = process_cpu_timer_create,
1641                .nsleep = process_cpu_nsleep,
1642                .nsleep_restart = process_cpu_nsleep_restart,
1643        };
1644        struct k_clock thread = {
1645                .clock_getres = thread_cpu_clock_getres,
1646                .clock_get = thread_cpu_clock_get,
1647                .clock_set = do_posix_clock_nosettime,
1648                .timer_create = thread_cpu_timer_create,
1649                .nsleep = thread_cpu_nsleep,
1650                .nsleep_restart = thread_cpu_nsleep_restart,
1651        };
1652
1653        register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1654        register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1655
1656        return 0;
1657}
1658__initcall(init_posix_cpu_timers);
1659
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.