linux/kernel/posix-cpu-timers.c
<<
>>
Prefs
   1/*
   2 * Implement CPU time clocks for the POSIX clock interface.
   3 */
   4
   5#include <linux/sched.h>
   6#include <linux/posix-timers.h>
   7#include <linux/errno.h>
   8#include <linux/math64.h>
   9#include <asm/uaccess.h>
  10#include <linux/kernel_stat.h>
  11#include <trace/events/timer.h>
  12#include <linux/random.h>
  13
  14/*
  15 * Called after updating RLIMIT_CPU to run cpu timer and update
  16 * tsk->signal->cputime_expires expiration cache if necessary. Needs
  17 * siglock protection since other code may update expiration cache as
  18 * well.
  19 */
  20void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
  21{
  22        cputime_t cputime = secs_to_cputime(rlim_new);
  23
  24        spin_lock_irq(&task->sighand->siglock);
  25        set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
  26        spin_unlock_irq(&task->sighand->siglock);
  27}
  28
  29static int check_clock(const clockid_t which_clock)
  30{
  31        int error = 0;
  32        struct task_struct *p;
  33        const pid_t pid = CPUCLOCK_PID(which_clock);
  34
  35        if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
  36                return -EINVAL;
  37
  38        if (pid == 0)
  39                return 0;
  40
  41        rcu_read_lock();
  42        p = find_task_by_vpid(pid);
  43        if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
  44                   same_thread_group(p, current) : has_group_leader_pid(p))) {
  45                error = -EINVAL;
  46        }
  47        rcu_read_unlock();
  48
  49        return error;
  50}
  51
  52static inline union cpu_time_count
  53timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
  54{
  55        union cpu_time_count ret;
  56        ret.sched = 0;          /* high half always zero when .cpu used */
  57        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  58                ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
  59        } else {
  60                ret.cpu = timespec_to_cputime(tp);
  61        }
  62        return ret;
  63}
  64
  65static void sample_to_timespec(const clockid_t which_clock,
  66                               union cpu_time_count cpu,
  67                               struct timespec *tp)
  68{
  69        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
  70                *tp = ns_to_timespec(cpu.sched);
  71        else
  72                cputime_to_timespec(cpu.cpu, tp);
  73}
  74
  75static inline int cpu_time_before(const clockid_t which_clock,
  76                                  union cpu_time_count now,
  77                                  union cpu_time_count then)
  78{
  79        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  80                return now.sched < then.sched;
  81        }  else {
  82                return now.cpu < then.cpu;
  83        }
  84}
  85static inline void cpu_time_add(const clockid_t which_clock,
  86                                union cpu_time_count *acc,
  87                                union cpu_time_count val)
  88{
  89        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  90                acc->sched += val.sched;
  91        }  else {
  92                acc->cpu += val.cpu;
  93        }
  94}
  95static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
  96                                                union cpu_time_count a,
  97                                                union cpu_time_count b)
  98{
  99        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 100                a.sched -= b.sched;
 101        }  else {
 102                a.cpu -= b.cpu;
 103        }
 104        return a;
 105}
 106
 107/*
 108 * Update expiry time from increment, and increase overrun count,
 109 * given the current clock sample.
 110 */
 111static void bump_cpu_timer(struct k_itimer *timer,
 112                                  union cpu_time_count now)
 113{
 114        int i;
 115
 116        if (timer->it.cpu.incr.sched == 0)
 117                return;
 118
 119        if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
 120                unsigned long long delta, incr;
 121
 122                if (now.sched < timer->it.cpu.expires.sched)
 123                        return;
 124                incr = timer->it.cpu.incr.sched;
 125                delta = now.sched + incr - timer->it.cpu.expires.sched;
 126                /* Don't use (incr*2 < delta), incr*2 might overflow. */
 127                for (i = 0; incr < delta - incr; i++)
 128                        incr = incr << 1;
 129                for (; i >= 0; incr >>= 1, i--) {
 130                        if (delta < incr)
 131                                continue;
 132                        timer->it.cpu.expires.sched += incr;
 133                        timer->it_overrun += 1 << i;
 134                        delta -= incr;
 135                }
 136        } else {
 137                cputime_t delta, incr;
 138
 139                if (now.cpu < timer->it.cpu.expires.cpu)
 140                        return;
 141                incr = timer->it.cpu.incr.cpu;
 142                delta = now.cpu + incr - timer->it.cpu.expires.cpu;
 143                /* Don't use (incr*2 < delta), incr*2 might overflow. */
 144                for (i = 0; incr < delta - incr; i++)
 145                             incr += incr;
 146                for (; i >= 0; incr = incr >> 1, i--) {
 147                        if (delta < incr)
 148                                continue;
 149                        timer->it.cpu.expires.cpu += incr;
 150                        timer->it_overrun += 1 << i;
 151                        delta -= incr;
 152                }
 153        }
 154}
 155
 156static inline cputime_t prof_ticks(struct task_struct *p)
 157{
 158        return p->utime + p->stime;
 159}
 160static inline cputime_t virt_ticks(struct task_struct *p)
 161{
 162        return p->utime;
 163}
 164
 165static int
 166posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 167{
 168        int error = check_clock(which_clock);
 169        if (!error) {
 170                tp->tv_sec = 0;
 171                tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
 172                if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 173                        /*
 174                         * If sched_clock is using a cycle counter, we
 175                         * don't have any idea of its true resolution
 176                         * exported, but it is much more than 1s/HZ.
 177                         */
 178                        tp->tv_nsec = 1;
 179                }
 180        }
 181        return error;
 182}
 183
 184static int
 185posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 186{
 187        /*
 188         * You can never reset a CPU clock, but we check for other errors
 189         * in the call before failing with EPERM.
 190         */
 191        int error = check_clock(which_clock);
 192        if (error == 0) {
 193                error = -EPERM;
 194        }
 195        return error;
 196}
 197
 198
 199/*
 200 * Sample a per-thread clock for the given task.
 201 */
 202static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 203                            union cpu_time_count *cpu)
 204{
 205        switch (CPUCLOCK_WHICH(which_clock)) {
 206        default:
 207                return -EINVAL;
 208        case CPUCLOCK_PROF:
 209                cpu->cpu = prof_ticks(p);
 210                break;
 211        case CPUCLOCK_VIRT:
 212                cpu->cpu = virt_ticks(p);
 213                break;
 214        case CPUCLOCK_SCHED:
 215                cpu->sched = task_sched_runtime(p);
 216                break;
 217        }
 218        return 0;
 219}
 220
 221static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 222{
 223        if (b->utime > a->utime)
 224                a->utime = b->utime;
 225
 226        if (b->stime > a->stime)
 227                a->stime = b->stime;
 228
 229        if (b->sum_exec_runtime > a->sum_exec_runtime)
 230                a->sum_exec_runtime = b->sum_exec_runtime;
 231}
 232
 233void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 234{
 235        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 236        struct task_cputime sum;
 237        unsigned long flags;
 238
 239        if (!cputimer->running) {
 240                /*
 241                 * The POSIX timer interface allows for absolute time expiry
 242                 * values through the TIMER_ABSTIME flag, therefore we have
 243                 * to synchronize the timer to the clock every time we start
 244                 * it.
 245                 */
 246                thread_group_cputime(tsk, &sum);
 247                raw_spin_lock_irqsave(&cputimer->lock, flags);
 248                cputimer->running = 1;
 249                update_gt_cputime(&cputimer->cputime, &sum);
 250        } else
 251                raw_spin_lock_irqsave(&cputimer->lock, flags);
 252        *times = cputimer->cputime;
 253        raw_spin_unlock_irqrestore(&cputimer->lock, flags);
 254}
 255
 256/*
 257 * Sample a process (thread group) clock for the given group_leader task.
 258 * Must be called with tasklist_lock held for reading.
 259 */
 260static int cpu_clock_sample_group(const clockid_t which_clock,
 261                                  struct task_struct *p,
 262                                  union cpu_time_count *cpu)
 263{
 264        struct task_cputime cputime;
 265
 266        switch (CPUCLOCK_WHICH(which_clock)) {
 267        default:
 268                return -EINVAL;
 269        case CPUCLOCK_PROF:
 270                thread_group_cputime(p, &cputime);
 271                cpu->cpu = cputime.utime + cputime.stime;
 272                break;
 273        case CPUCLOCK_VIRT:
 274                thread_group_cputime(p, &cputime);
 275                cpu->cpu = cputime.utime;
 276                break;
 277        case CPUCLOCK_SCHED:
 278                thread_group_cputime(p, &cputime);
 279                cpu->sched = cputime.sum_exec_runtime;
 280                break;
 281        }
 282        return 0;
 283}
 284
 285
 286static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 287{
 288        const pid_t pid = CPUCLOCK_PID(which_clock);
 289        int error = -EINVAL;
 290        union cpu_time_count rtn;
 291
 292        if (pid == 0) {
 293                /*
 294                 * Special case constant value for our own clocks.
 295                 * We don't have to do any lookup to find ourselves.
 296                 */
 297                if (CPUCLOCK_PERTHREAD(which_clock)) {
 298                        /*
 299                         * Sampling just ourselves we can do with no locking.
 300                         */
 301                        error = cpu_clock_sample(which_clock,
 302                                                 current, &rtn);
 303                } else {
 304                        read_lock(&tasklist_lock);
 305                        error = cpu_clock_sample_group(which_clock,
 306                                                       current, &rtn);
 307                        read_unlock(&tasklist_lock);
 308                }
 309        } else {
 310                /*
 311                 * Find the given PID, and validate that the caller
 312                 * should be able to see it.
 313                 */
 314                struct task_struct *p;
 315                rcu_read_lock();
 316                p = find_task_by_vpid(pid);
 317                if (p) {
 318                        if (CPUCLOCK_PERTHREAD(which_clock)) {
 319                                if (same_thread_group(p, current)) {
 320                                        error = cpu_clock_sample(which_clock,
 321                                                                 p, &rtn);
 322                                }
 323                        } else {
 324                                read_lock(&tasklist_lock);
 325                                if (thread_group_leader(p) && p->sighand) {
 326                                        error =
 327                                            cpu_clock_sample_group(which_clock,
 328                                                                   p, &rtn);
 329                                }
 330                                read_unlock(&tasklist_lock);
 331                        }
 332                }
 333                rcu_read_unlock();
 334        }
 335
 336        if (error)
 337                return error;
 338        sample_to_timespec(which_clock, rtn, tp);
 339        return 0;
 340}
 341
 342
 343/*
 344 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
 345 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
 346 * new timer already all-zeros initialized.
 347 */
 348static int posix_cpu_timer_create(struct k_itimer *new_timer)
 349{
 350        int ret = 0;
 351        const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
 352        struct task_struct *p;
 353
 354        if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
 355                return -EINVAL;
 356
 357        INIT_LIST_HEAD(&new_timer->it.cpu.entry);
 358
 359        rcu_read_lock();
 360        if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
 361                if (pid == 0) {
 362                        p = current;
 363                } else {
 364                        p = find_task_by_vpid(pid);
 365                        if (p && !same_thread_group(p, current))
 366                                p = NULL;
 367                }
 368        } else {
 369                if (pid == 0) {
 370                        p = current->group_leader;
 371                } else {
 372                        p = find_task_by_vpid(pid);
 373                        if (p && !has_group_leader_pid(p))
 374                                p = NULL;
 375                }
 376        }
 377        new_timer->it.cpu.task = p;
 378        if (p) {
 379                get_task_struct(p);
 380        } else {
 381                ret = -EINVAL;
 382        }
 383        rcu_read_unlock();
 384
 385        return ret;
 386}
 387
 388/*
 389 * Clean up a CPU-clock timer that is about to be destroyed.
 390 * This is called from timer deletion with the timer already locked.
 391 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 392 * and try again.  (This happens when the timer is in the middle of firing.)
 393 */
 394static int posix_cpu_timer_del(struct k_itimer *timer)
 395{
 396        struct task_struct *p = timer->it.cpu.task;
 397        int ret = 0;
 398
 399        if (likely(p != NULL)) {
 400                read_lock(&tasklist_lock);
 401                if (unlikely(p->sighand == NULL)) {
 402                        /*
 403                         * We raced with the reaping of the task.
 404                         * The deletion should have cleared us off the list.
 405                         */
 406                        BUG_ON(!list_empty(&timer->it.cpu.entry));
 407                } else {
 408                        spin_lock(&p->sighand->siglock);
 409                        if (timer->it.cpu.firing)
 410                                ret = TIMER_RETRY;
 411                        else
 412                                list_del(&timer->it.cpu.entry);
 413                        spin_unlock(&p->sighand->siglock);
 414                }
 415                read_unlock(&tasklist_lock);
 416
 417                if (!ret)
 418                        put_task_struct(p);
 419        }
 420
 421        return ret;
 422}
 423
 424/*
 425 * Clean out CPU timers still ticking when a thread exited.  The task
 426 * pointer is cleared, and the expiry time is replaced with the residual
 427 * time for later timer_gettime calls to return.
 428 * This must be called with the siglock held.
 429 */
 430static void cleanup_timers(struct list_head *head,
 431                           cputime_t utime, cputime_t stime,
 432                           unsigned long long sum_exec_runtime)
 433{
 434        struct cpu_timer_list *timer, *next;
 435        cputime_t ptime = utime + stime;
 436
 437        list_for_each_entry_safe(timer, next, head, entry) {
 438                list_del_init(&timer->entry);
 439                if (timer->expires.cpu < ptime) {
 440                        timer->expires.cpu = 0;
 441                } else {
 442                        timer->expires.cpu -= ptime;
 443                }
 444        }
 445
 446        ++head;
 447        list_for_each_entry_safe(timer, next, head, entry) {
 448                list_del_init(&timer->entry);
 449                if (timer->expires.cpu < utime) {
 450                        timer->expires.cpu = 0;
 451                } else {
 452                        timer->expires.cpu -= utime;
 453                }
 454        }
 455
 456        ++head;
 457        list_for_each_entry_safe(timer, next, head, entry) {
 458                list_del_init(&timer->entry);
 459                if (timer->expires.sched < sum_exec_runtime) {
 460                        timer->expires.sched = 0;
 461                } else {
 462                        timer->expires.sched -= sum_exec_runtime;
 463                }
 464        }
 465}
 466
 467/*
 468 * These are both called with the siglock held, when the current thread
 469 * is being reaped.  When the final (leader) thread in the group is reaped,
 470 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
 471 */
 472void posix_cpu_timers_exit(struct task_struct *tsk)
 473{
 474        add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 475                                                sizeof(unsigned long long));
 476        cleanup_timers(tsk->cpu_timers,
 477                       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
 478
 479}
 480void posix_cpu_timers_exit_group(struct task_struct *tsk)
 481{
 482        struct signal_struct *const sig = tsk->signal;
 483
 484        cleanup_timers(tsk->signal->cpu_timers,
 485                       tsk->utime + sig->utime, tsk->stime + sig->stime,
 486                       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 487}
 488
 489static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
 490{
 491        /*
 492         * That's all for this thread or process.
 493         * We leave our residual in expires to be reported.
 494         */
 495        put_task_struct(timer->it.cpu.task);
 496        timer->it.cpu.task = NULL;
 497        timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
 498                                             timer->it.cpu.expires,
 499                                             now);
 500}
 501
 502static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 503{
 504        return expires == 0 || expires > new_exp;
 505}
 506
 507/*
 508 * Insert the timer on the appropriate list before any timers that
 509 * expire later.  This must be called with the tasklist_lock held
 510 * for reading, interrupts disabled and p->sighand->siglock taken.
 511 */
 512static void arm_timer(struct k_itimer *timer)
 513{
 514        struct task_struct *p = timer->it.cpu.task;
 515        struct list_head *head, *listpos;
 516        struct task_cputime *cputime_expires;
 517        struct cpu_timer_list *const nt = &timer->it.cpu;
 518        struct cpu_timer_list *next;
 519
 520        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 521                head = p->cpu_timers;
 522                cputime_expires = &p->cputime_expires;
 523        } else {
 524                head = p->signal->cpu_timers;
 525                cputime_expires = &p->signal->cputime_expires;
 526        }
 527        head += CPUCLOCK_WHICH(timer->it_clock);
 528
 529        listpos = head;
 530        list_for_each_entry(next, head, entry) {
 531                if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
 532                        break;
 533                listpos = &next->entry;
 534        }
 535        list_add(&nt->entry, listpos);
 536
 537        if (listpos == head) {
 538                union cpu_time_count *exp = &nt->expires;
 539
 540                /*
 541                 * We are the new earliest-expiring POSIX 1.b timer, hence
 542                 * need to update expiration cache. Take into account that
 543                 * for process timers we share expiration cache with itimers
 544                 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
 545                 */
 546
 547                switch (CPUCLOCK_WHICH(timer->it_clock)) {
 548                case CPUCLOCK_PROF:
 549                        if (expires_gt(cputime_expires->prof_exp, exp->cpu))
 550                                cputime_expires->prof_exp = exp->cpu;
 551                        break;
 552                case CPUCLOCK_VIRT:
 553                        if (expires_gt(cputime_expires->virt_exp, exp->cpu))
 554                                cputime_expires->virt_exp = exp->cpu;
 555                        break;
 556                case CPUCLOCK_SCHED:
 557                        if (cputime_expires->sched_exp == 0 ||
 558                            cputime_expires->sched_exp > exp->sched)
 559                                cputime_expires->sched_exp = exp->sched;
 560                        break;
 561                }
 562        }
 563}
 564
 565/*
 566 * The timer is locked, fire it and arrange for its reload.
 567 */
 568static void cpu_timer_fire(struct k_itimer *timer)
 569{
 570        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
 571                /*
 572                 * User don't want any signal.
 573                 */
 574                timer->it.cpu.expires.sched = 0;
 575        } else if (unlikely(timer->sigq == NULL)) {
 576                /*
 577                 * This a special case for clock_nanosleep,
 578                 * not a normal timer from sys_timer_create.
 579                 */
 580                wake_up_process(timer->it_process);
 581                timer->it.cpu.expires.sched = 0;
 582        } else if (timer->it.cpu.incr.sched == 0) {
 583                /*
 584                 * One-shot timer.  Clear it as soon as it's fired.
 585                 */
 586                posix_timer_event(timer, 0);
 587                timer->it.cpu.expires.sched = 0;
 588        } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
 589                /*
 590                 * The signal did not get queued because the signal
 591                 * was ignored, so we won't get any callback to
 592                 * reload the timer.  But we need to keep it
 593                 * ticking in case the signal is deliverable next time.
 594                 */
 595                posix_cpu_timer_schedule(timer);
 596        }
 597}
 598
 599/*
 600 * Sample a process (thread group) timer for the given group_leader task.
 601 * Must be called with tasklist_lock held for reading.
 602 */
 603static int cpu_timer_sample_group(const clockid_t which_clock,
 604                                  struct task_struct *p,
 605                                  union cpu_time_count *cpu)
 606{
 607        struct task_cputime cputime;
 608
 609        thread_group_cputimer(p, &cputime);
 610        switch (CPUCLOCK_WHICH(which_clock)) {
 611        default:
 612                return -EINVAL;
 613        case CPUCLOCK_PROF:
 614                cpu->cpu = cputime.utime + cputime.stime;
 615                break;
 616        case CPUCLOCK_VIRT:
 617                cpu->cpu = cputime.utime;
 618                break;
 619        case CPUCLOCK_SCHED:
 620                cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
 621                break;
 622        }
 623        return 0;
 624}
 625
 626/*
 627 * Guts of sys_timer_settime for CPU timers.
 628 * This is called with the timer locked and interrupts disabled.
 629 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 630 * and try again.  (This happens when the timer is in the middle of firing.)
 631 */
 632static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 633                               struct itimerspec *new, struct itimerspec *old)
 634{
 635        struct task_struct *p = timer->it.cpu.task;
 636        union cpu_time_count old_expires, new_expires, old_incr, val;
 637        int ret;
 638
 639        if (unlikely(p == NULL)) {
 640                /*
 641                 * Timer refers to a dead task's clock.
 642                 */
 643                return -ESRCH;
 644        }
 645
 646        new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
 647
 648        read_lock(&tasklist_lock);
 649        /*
 650         * We need the tasklist_lock to protect against reaping that
 651         * clears p->sighand.  If p has just been reaped, we can no
 652         * longer get any information about it at all.
 653         */
 654        if (unlikely(p->sighand == NULL)) {
 655                read_unlock(&tasklist_lock);
 656                put_task_struct(p);
 657                timer->it.cpu.task = NULL;
 658                return -ESRCH;
 659        }
 660
 661        /*
 662         * Disarm any old timer after extracting its expiry time.
 663         */
 664        BUG_ON(!irqs_disabled());
 665
 666        ret = 0;
 667        old_incr = timer->it.cpu.incr;
 668        spin_lock(&p->sighand->siglock);
 669        old_expires = timer->it.cpu.expires;
 670        if (unlikely(timer->it.cpu.firing)) {
 671                timer->it.cpu.firing = -1;
 672                ret = TIMER_RETRY;
 673        } else
 674                list_del_init(&timer->it.cpu.entry);
 675
 676        /*
 677         * We need to sample the current value to convert the new
 678         * value from to relative and absolute, and to convert the
 679         * old value from absolute to relative.  To set a process
 680         * timer, we need a sample to balance the thread expiry
 681         * times (in arm_timer).  With an absolute time, we must
 682         * check if it's already passed.  In short, we need a sample.
 683         */
 684        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 685                cpu_clock_sample(timer->it_clock, p, &val);
 686        } else {
 687                cpu_timer_sample_group(timer->it_clock, p, &val);
 688        }
 689
 690        if (old) {
 691                if (old_expires.sched == 0) {
 692                        old->it_value.tv_sec = 0;
 693                        old->it_value.tv_nsec = 0;
 694                } else {
 695                        /*
 696                         * Update the timer in case it has
 697                         * overrun already.  If it has,
 698                         * we'll report it as having overrun
 699                         * and with the next reloaded timer
 700                         * already ticking, though we are
 701                         * swallowing that pending
 702                         * notification here to install the
 703                         * new setting.
 704                         */
 705                        bump_cpu_timer(timer, val);
 706                        if (cpu_time_before(timer->it_clock, val,
 707                                            timer->it.cpu.expires)) {
 708                                old_expires = cpu_time_sub(
 709                                        timer->it_clock,
 710                                        timer->it.cpu.expires, val);
 711                                sample_to_timespec(timer->it_clock,
 712                                                   old_expires,
 713                                                   &old->it_value);
 714                        } else {
 715                                old->it_value.tv_nsec = 1;
 716                                old->it_value.tv_sec = 0;
 717                        }
 718                }
 719        }
 720
 721        if (unlikely(ret)) {
 722                /*
 723                 * We are colliding with the timer actually firing.
 724                 * Punt after filling in the timer's old value, and
 725                 * disable this firing since we are already reporting
 726                 * it as an overrun (thanks to bump_cpu_timer above).
 727                 */
 728                spin_unlock(&p->sighand->siglock);
 729                read_unlock(&tasklist_lock);
 730                goto out;
 731        }
 732
 733        if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
 734                cpu_time_add(timer->it_clock, &new_expires, val);
 735        }
 736
 737        /*
 738         * Install the new expiry time (or zero).
 739         * For a timer with no notification action, we don't actually
 740         * arm the timer (we'll just fake it for timer_gettime).
 741         */
 742        timer->it.cpu.expires = new_expires;
 743        if (new_expires.sched != 0 &&
 744            cpu_time_before(timer->it_clock, val, new_expires)) {
 745                arm_timer(timer);
 746        }
 747
 748        spin_unlock(&p->sighand->siglock);
 749        read_unlock(&tasklist_lock);
 750
 751        /*
 752         * Install the new reload setting, and
 753         * set up the signal and overrun bookkeeping.
 754         */
 755        timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
 756                                                &new->it_interval);
 757
 758        /*
 759         * This acts as a modification timestamp for the timer,
 760         * so any automatic reload attempt will punt on seeing
 761         * that we have reset the timer manually.
 762         */
 763        timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
 764                ~REQUEUE_PENDING;
 765        timer->it_overrun_last = 0;
 766        timer->it_overrun = -1;
 767
 768        if (new_expires.sched != 0 &&
 769            !cpu_time_before(timer->it_clock, val, new_expires)) {
 770                /*
 771                 * The designated time already passed, so we notify
 772                 * immediately, even if the thread never runs to
 773                 * accumulate more time on this clock.
 774                 */
 775                cpu_timer_fire(timer);
 776        }
 777
 778        ret = 0;
 779 out:
 780        if (old) {
 781                sample_to_timespec(timer->it_clock,
 782                                   old_incr, &old->it_interval);
 783        }
 784        return ret;
 785}
 786
 787static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 788{
 789        union cpu_time_count now;
 790        struct task_struct *p = timer->it.cpu.task;
 791        int clear_dead;
 792
 793        /*
 794         * Easy part: convert the reload time.
 795         */
 796        sample_to_timespec(timer->it_clock,
 797                           timer->it.cpu.incr, &itp->it_interval);
 798
 799        if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all.  */
 800                itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
 801                return;
 802        }
 803
 804        if (unlikely(p == NULL)) {
 805                /*
 806                 * This task already died and the timer will never fire.
 807                 * In this case, expires is actually the dead value.
 808                 */
 809        dead:
 810                sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
 811                                   &itp->it_value);
 812                return;
 813        }
 814
 815        /*
 816         * Sample the clock to take the difference with the expiry time.
 817         */
 818        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 819                cpu_clock_sample(timer->it_clock, p, &now);
 820                clear_dead = p->exit_state;
 821        } else {
 822                read_lock(&tasklist_lock);
 823                if (unlikely(p->sighand == NULL)) {
 824                        /*
 825                         * The process has been reaped.
 826                         * We can't even collect a sample any more.
 827                         * Call the timer disarmed, nothing else to do.
 828                         */
 829                        put_task_struct(p);
 830                        timer->it.cpu.task = NULL;
 831                        timer->it.cpu.expires.sched = 0;
 832                        read_unlock(&tasklist_lock);
 833                        goto dead;
 834                } else {
 835                        cpu_timer_sample_group(timer->it_clock, p, &now);
 836                        clear_dead = (unlikely(p->exit_state) &&
 837                                      thread_group_empty(p));
 838                }
 839                read_unlock(&tasklist_lock);
 840        }
 841
 842        if (unlikely(clear_dead)) {
 843                /*
 844                 * We've noticed that the thread is dead, but
 845                 * not yet reaped.  Take this opportunity to
 846                 * drop our task ref.
 847                 */
 848                clear_dead_task(timer, now);
 849                goto dead;
 850        }
 851
 852        if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
 853                sample_to_timespec(timer->it_clock,
 854                                   cpu_time_sub(timer->it_clock,
 855                                                timer->it.cpu.expires, now),
 856                                   &itp->it_value);
 857        } else {
 858                /*
 859                 * The timer should have expired already, but the firing
 860                 * hasn't taken place yet.  Say it's just about to expire.
 861                 */
 862                itp->it_value.tv_nsec = 1;
 863                itp->it_value.tv_sec = 0;
 864        }
 865}
 866
 867/*
 868 * Check for any per-thread CPU timers that have fired and move them off
 869 * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
 870 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
 871 */
 872static void check_thread_timers(struct task_struct *tsk,
 873                                struct list_head *firing)
 874{
 875        int maxfire;
 876        struct list_head *timers = tsk->cpu_timers;
 877        struct signal_struct *const sig = tsk->signal;
 878        unsigned long soft;
 879
 880        maxfire = 20;
 881        tsk->cputime_expires.prof_exp = 0;
 882        while (!list_empty(timers)) {
 883                struct cpu_timer_list *t = list_first_entry(timers,
 884                                                      struct cpu_timer_list,
 885                                                      entry);
 886                if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
 887                        tsk->cputime_expires.prof_exp = t->expires.cpu;
 888                        break;
 889                }
 890                t->firing = 1;
 891                list_move_tail(&t->entry, firing);
 892        }
 893
 894        ++timers;
 895        maxfire = 20;
 896        tsk->cputime_expires.virt_exp = 0;
 897        while (!list_empty(timers)) {
 898                struct cpu_timer_list *t = list_first_entry(timers,
 899                                                      struct cpu_timer_list,
 900                                                      entry);
 901                if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
 902                        tsk->cputime_expires.virt_exp = t->expires.cpu;
 903                        break;
 904                }
 905                t->firing = 1;
 906                list_move_tail(&t->entry, firing);
 907        }
 908
 909        ++timers;
 910        maxfire = 20;
 911        tsk->cputime_expires.sched_exp = 0;
 912        while (!list_empty(timers)) {
 913                struct cpu_timer_list *t = list_first_entry(timers,
 914                                                      struct cpu_timer_list,
 915                                                      entry);
 916                if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
 917                        tsk->cputime_expires.sched_exp = t->expires.sched;
 918                        break;
 919                }
 920                t->firing = 1;
 921                list_move_tail(&t->entry, firing);
 922        }
 923
 924        /*
 925         * Check for the special case thread timers.
 926         */
 927        soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
 928        if (soft != RLIM_INFINITY) {
 929                unsigned long hard =
 930                        ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
 931
 932                if (hard != RLIM_INFINITY &&
 933                    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
 934                        /*
 935                         * At the hard limit, we just die.
 936                         * No need to calculate anything else now.
 937                         */
 938                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 939                        return;
 940                }
 941                if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
 942                        /*
 943                         * At the soft limit, send a SIGXCPU every second.
 944                         */
 945                        if (soft < hard) {
 946                                soft += USEC_PER_SEC;
 947                                sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 948                        }
 949                        printk(KERN_INFO
 950                                "RT Watchdog Timeout: %s[%d]\n",
 951                                tsk->comm, task_pid_nr(tsk));
 952                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 953                }
 954        }
 955}
 956
 957static void stop_process_timers(struct signal_struct *sig)
 958{
 959        struct thread_group_cputimer *cputimer = &sig->cputimer;
 960        unsigned long flags;
 961
 962        raw_spin_lock_irqsave(&cputimer->lock, flags);
 963        cputimer->running = 0;
 964        raw_spin_unlock_irqrestore(&cputimer->lock, flags);
 965}
 966
 967static u32 onecputick;
 968
 969static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 970                             cputime_t *expires, cputime_t cur_time, int signo)
 971{
 972        if (!it->expires)
 973                return;
 974
 975        if (cur_time >= it->expires) {
 976                if (it->incr) {
 977                        it->expires += it->incr;
 978                        it->error += it->incr_error;
 979                        if (it->error >= onecputick) {
 980                                it->expires -= cputime_one_jiffy;
 981                                it->error -= onecputick;
 982                        }
 983                } else {
 984                        it->expires = 0;
 985                }
 986
 987                trace_itimer_expire(signo == SIGPROF ?
 988                                    ITIMER_PROF : ITIMER_VIRTUAL,
 989                                    tsk->signal->leader_pid, cur_time);
 990                __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 991        }
 992
 993        if (it->expires && (!*expires || it->expires < *expires)) {
 994                *expires = it->expires;
 995        }
 996}
 997
 998/**
 999 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1000 *
1001 * @cputime:    The struct to compare.
1002 *
1003 * Checks @cputime to see if all fields are zero.  Returns true if all fields
1004 * are zero, false if any field is nonzero.
1005 */
1006static inline int task_cputime_zero(const struct task_cputime *cputime)
1007{
1008        if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
1009                return 1;
1010        return 0;
1011}
1012
1013/*
1014 * Check for any per-thread CPU timers that have fired and move them
1015 * off the tsk->*_timers list onto the firing list.  Per-thread timers
1016 * have already been taken off.
1017 */
1018static void check_process_timers(struct task_struct *tsk,
1019                                 struct list_head *firing)
1020{
1021        int maxfire;
1022        struct signal_struct *const sig = tsk->signal;
1023        cputime_t utime, ptime, virt_expires, prof_expires;
1024        unsigned long long sum_sched_runtime, sched_expires;
1025        struct list_head *timers = sig->cpu_timers;
1026        struct task_cputime cputime;
1027        unsigned long soft;
1028
1029        /*
1030         * Collect the current process totals.
1031         */
1032        thread_group_cputimer(tsk, &cputime);
1033        utime = cputime.utime;
1034        ptime = utime + cputime.stime;
1035        sum_sched_runtime = cputime.sum_exec_runtime;
1036        maxfire = 20;
1037        prof_expires = 0;
1038        while (!list_empty(timers)) {
1039                struct cpu_timer_list *tl = list_first_entry(timers,
1040                                                      struct cpu_timer_list,
1041                                                      entry);
1042                if (!--maxfire || ptime < tl->expires.cpu) {
1043                        prof_expires = tl->expires.cpu;
1044                        break;
1045                }
1046                tl->firing = 1;
1047                list_move_tail(&tl->entry, firing);
1048        }
1049
1050        ++timers;
1051        maxfire = 20;
1052        virt_expires = 0;
1053        while (!list_empty(timers)) {
1054                struct cpu_timer_list *tl = list_first_entry(timers,
1055                                                      struct cpu_timer_list,
1056                                                      entry);
1057                if (!--maxfire || utime < tl->expires.cpu) {
1058                        virt_expires = tl->expires.cpu;
1059                        break;
1060                }
1061                tl->firing = 1;
1062                list_move_tail(&tl->entry, firing);
1063        }
1064
1065        ++timers;
1066        maxfire = 20;
1067        sched_expires = 0;
1068        while (!list_empty(timers)) {
1069                struct cpu_timer_list *tl = list_first_entry(timers,
1070                                                      struct cpu_timer_list,
1071                                                      entry);
1072                if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
1073                        sched_expires = tl->expires.sched;
1074                        break;
1075                }
1076                tl->firing = 1;
1077                list_move_tail(&tl->entry, firing);
1078        }
1079
1080        /*
1081         * Check for the special case process timers.
1082         */
1083        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
1084                         SIGPROF);
1085        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
1086                         SIGVTALRM);
1087        soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
1088        if (soft != RLIM_INFINITY) {
1089                unsigned long psecs = cputime_to_secs(ptime);
1090                unsigned long hard =
1091                        ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
1092                cputime_t x;
1093                if (psecs >= hard) {
1094                        /*
1095                         * At the hard limit, we just die.
1096                         * No need to calculate anything else now.
1097                         */
1098                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1099                        return;
1100                }
1101                if (psecs >= soft) {
1102                        /*
1103                         * At the soft limit, send a SIGXCPU every second.
1104                         */
1105                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1106                        if (soft < hard) {
1107                                soft++;
1108                                sig->rlim[RLIMIT_CPU].rlim_cur = soft;
1109                        }
1110                }
1111                x = secs_to_cputime(soft);
1112                if (!prof_expires || x < prof_expires) {
1113                        prof_expires = x;
1114                }
1115        }
1116
1117        sig->cputime_expires.prof_exp = prof_expires;
1118        sig->cputime_expires.virt_exp = virt_expires;
1119        sig->cputime_expires.sched_exp = sched_expires;
1120        if (task_cputime_zero(&sig->cputime_expires))
1121                stop_process_timers(sig);
1122}
1123
1124/*
1125 * This is called from the signal code (via do_schedule_next_timer)
1126 * when the last timer signal was delivered and we have to reload the timer.
1127 */
1128void posix_cpu_timer_schedule(struct k_itimer *timer)
1129{
1130        struct task_struct *p = timer->it.cpu.task;
1131        union cpu_time_count now;
1132
1133        if (unlikely(p == NULL))
1134                /*
1135                 * The task was cleaned up already, no future firings.
1136                 */
1137                goto out;
1138
1139        /*
1140         * Fetch the current sample and update the timer's expiry time.
1141         */
1142        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1143                cpu_clock_sample(timer->it_clock, p, &now);
1144                bump_cpu_timer(timer, now);
1145                if (unlikely(p->exit_state)) {
1146                        clear_dead_task(timer, now);
1147                        goto out;
1148                }
1149                read_lock(&tasklist_lock); /* arm_timer needs it.  */
1150                spin_lock(&p->sighand->siglock);
1151        } else {
1152                read_lock(&tasklist_lock);
1153                if (unlikely(p->sighand == NULL)) {
1154                        /*
1155                         * The process has been reaped.
1156                         * We can't even collect a sample any more.
1157                         */
1158                        put_task_struct(p);
1159                        timer->it.cpu.task = p = NULL;
1160                        timer->it.cpu.expires.sched = 0;
1161                        goto out_unlock;
1162                } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1163                        /*
1164                         * We've noticed that the thread is dead, but
1165                         * not yet reaped.  Take this opportunity to
1166                         * drop our task ref.
1167                         */
1168                        clear_dead_task(timer, now);
1169                        goto out_unlock;
1170                }
1171                spin_lock(&p->sighand->siglock);
1172                cpu_timer_sample_group(timer->it_clock, p, &now);
1173                bump_cpu_timer(timer, now);
1174                /* Leave the tasklist_lock locked for the call below.  */
1175        }
1176
1177        /*
1178         * Now re-arm for the new expiry time.
1179         */
1180        BUG_ON(!irqs_disabled());
1181        arm_timer(timer);
1182        spin_unlock(&p->sighand->siglock);
1183
1184out_unlock:
1185        read_unlock(&tasklist_lock);
1186
1187out:
1188        timer->it_overrun_last = timer->it_overrun;
1189        timer->it_overrun = -1;
1190        ++timer->it_requeue_pending;
1191}
1192
1193/**
1194 * task_cputime_expired - Compare two task_cputime entities.
1195 *
1196 * @sample:     The task_cputime structure to be checked for expiration.
1197 * @expires:    Expiration times, against which @sample will be checked.
1198 *
1199 * Checks @sample against @expires to see if any field of @sample has expired.
1200 * Returns true if any field of the former is greater than the corresponding
1201 * field of the latter if the latter field is set.  Otherwise returns false.
1202 */
1203static inline int task_cputime_expired(const struct task_cputime *sample,
1204                                        const struct task_cputime *expires)
1205{
1206        if (expires->utime && sample->utime >= expires->utime)
1207                return 1;
1208        if (expires->stime && sample->utime + sample->stime >= expires->stime)
1209                return 1;
1210        if (expires->sum_exec_runtime != 0 &&
1211            sample->sum_exec_runtime >= expires->sum_exec_runtime)
1212                return 1;
1213        return 0;
1214}
1215
1216/**
1217 * fastpath_timer_check - POSIX CPU timers fast path.
1218 *
1219 * @tsk:        The task (thread) being checked.
1220 *
1221 * Check the task and thread group timers.  If both are zero (there are no
1222 * timers set) return false.  Otherwise snapshot the task and thread group
1223 * timers and compare them with the corresponding expiration times.  Return
1224 * true if a timer has expired, else return false.
1225 */
1226static inline int fastpath_timer_check(struct task_struct *tsk)
1227{
1228        struct signal_struct *sig;
1229
1230        if (!task_cputime_zero(&tsk->cputime_expires)) {
1231                struct task_cputime task_sample = {
1232                        .utime = tsk->utime,
1233                        .stime = tsk->stime,
1234                        .sum_exec_runtime = tsk->se.sum_exec_runtime
1235                };
1236
1237                if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1238                        return 1;
1239        }
1240
1241        sig = tsk->signal;
1242        if (sig->cputimer.running) {
1243                struct task_cputime group_sample;
1244
1245                raw_spin_lock(&sig->cputimer.lock);
1246                group_sample = sig->cputimer.cputime;
1247                raw_spin_unlock(&sig->cputimer.lock);
1248
1249                if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1250                        return 1;
1251        }
1252
1253        return 0;
1254}
1255
1256/*
1257 * This is called from the timer interrupt handler.  The irq handler has
1258 * already updated our counts.  We need to check if any timers fire now.
1259 * Interrupts are disabled.
1260 */
1261void run_posix_cpu_timers(struct task_struct *tsk)
1262{
1263        LIST_HEAD(firing);
1264        struct k_itimer *timer, *next;
1265        unsigned long flags;
1266
1267        BUG_ON(!irqs_disabled());
1268
1269        /*
1270         * The fast path checks that there are no expired thread or thread
1271         * group timers.  If that's so, just return.
1272         */
1273        if (!fastpath_timer_check(tsk))
1274                return;
1275
1276        if (!lock_task_sighand(tsk, &flags))
1277                return;
1278        /*
1279         * Here we take off tsk->signal->cpu_timers[N] and
1280         * tsk->cpu_timers[N] all the timers that are firing, and
1281         * put them on the firing list.
1282         */
1283        check_thread_timers(tsk, &firing);
1284        /*
1285         * If there are any active process wide timers (POSIX 1.b, itimers,
1286         * RLIMIT_CPU) cputimer must be running.
1287         */
1288        if (tsk->signal->cputimer.running)
1289                check_process_timers(tsk, &firing);
1290
1291        /*
1292         * We must release these locks before taking any timer's lock.
1293         * There is a potential race with timer deletion here, as the
1294         * siglock now protects our private firing list.  We have set
1295         * the firing flag in each timer, so that a deletion attempt
1296         * that gets the timer lock before we do will give it up and
1297         * spin until we've taken care of that timer below.
1298         */
1299        unlock_task_sighand(tsk, &flags);
1300
1301        /*
1302         * Now that all the timers on our list have the firing flag,
1303         * no one will touch their list entries but us.  We'll take
1304         * each timer's lock before clearing its firing flag, so no
1305         * timer call will interfere.
1306         */
1307        list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1308                int cpu_firing;
1309
1310                spin_lock(&timer->it_lock);
1311                list_del_init(&timer->it.cpu.entry);
1312                cpu_firing = timer->it.cpu.firing;
1313                timer->it.cpu.firing = 0;
1314                /*
1315                 * The firing flag is -1 if we collided with a reset
1316                 * of the timer, which already reported this
1317                 * almost-firing as an overrun.  So don't generate an event.
1318                 */
1319                if (likely(cpu_firing >= 0))
1320                        cpu_timer_fire(timer);
1321                spin_unlock(&timer->it_lock);
1322        }
1323}
1324
1325/*
1326 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
1327 * The tsk->sighand->siglock must be held by the caller.
1328 */
1329void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1330                           cputime_t *newval, cputime_t *oldval)
1331{
1332        union cpu_time_count now;
1333
1334        BUG_ON(clock_idx == CPUCLOCK_SCHED);
1335        cpu_timer_sample_group(clock_idx, tsk, &now);
1336
1337        if (oldval) {
1338                /*
1339                 * We are setting itimer. The *oldval is absolute and we update
1340                 * it to be relative, *newval argument is relative and we update
1341                 * it to be absolute.
1342                 */
1343                if (*oldval) {
1344                        if (*oldval <= now.cpu) {
1345                                /* Just about to fire. */
1346                                *oldval = cputime_one_jiffy;
1347                        } else {
1348                                *oldval -= now.cpu;
1349                        }
1350                }
1351
1352                if (!*newval)
1353                        return;
1354                *newval += now.cpu;
1355        }
1356
1357        /*
1358         * Update expiration cache if we are the earliest timer, or eventually
1359         * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1360         */
1361        switch (clock_idx) {
1362        case CPUCLOCK_PROF:
1363                if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1364                        tsk->signal->cputime_expires.prof_exp = *newval;
1365                break;
1366        case CPUCLOCK_VIRT:
1367                if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1368                        tsk->signal->cputime_expires.virt_exp = *newval;
1369                break;
1370        }
1371}
1372
1373static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1374                            struct timespec *rqtp, struct itimerspec *it)
1375{
1376        struct k_itimer timer;
1377        int error;
1378
1379        /*
1380         * Set up a temporary timer and then wait for it to go off.
1381         */
1382        memset(&timer, 0, sizeof timer);
1383        spin_lock_init(&timer.it_lock);
1384        timer.it_clock = which_clock;
1385        timer.it_overrun = -1;
1386        error = posix_cpu_timer_create(&timer);
1387        timer.it_process = current;
1388        if (!error) {
1389                static struct itimerspec zero_it;
1390
1391                memset(it, 0, sizeof *it);
1392                it->it_value = *rqtp;
1393
1394                spin_lock_irq(&timer.it_lock);
1395                error = posix_cpu_timer_set(&timer, flags, it, NULL);
1396                if (error) {
1397                        spin_unlock_irq(&timer.it_lock);
1398                        return error;
1399                }
1400
1401                while (!signal_pending(current)) {
1402                        if (timer.it.cpu.expires.sched == 0) {
1403                                /*
1404                                 * Our timer fired and was reset, below
1405                                 * deletion can not fail.
1406                                 */
1407                                posix_cpu_timer_del(&timer);
1408                                spin_unlock_irq(&timer.it_lock);
1409                                return 0;
1410                        }
1411
1412                        /*
1413                         * Block until cpu_timer_fire (or a signal) wakes us.
1414                         */
1415                        __set_current_state(TASK_INTERRUPTIBLE);
1416                        spin_unlock_irq(&timer.it_lock);
1417                        schedule();
1418                        spin_lock_irq(&timer.it_lock);
1419                }
1420
1421                /*
1422                 * We were interrupted by a signal.
1423                 */
1424                sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1425                error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
1426                if (!error) {
1427                        /*
1428                         * Timer is now unarmed, deletion can not fail.
1429                         */
1430                        posix_cpu_timer_del(&timer);
1431                }
1432                spin_unlock_irq(&timer.it_lock);
1433
1434                while (error == TIMER_RETRY) {
1435                        /*
1436                         * We need to handle case when timer was or is in the
1437                         * middle of firing. In other cases we already freed
1438                         * resources.
1439                         */
1440                        spin_lock_irq(&timer.it_lock);
1441                        error = posix_cpu_timer_del(&timer);
1442                        spin_unlock_irq(&timer.it_lock);
1443                }
1444
1445                if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1446                        /*
1447                         * It actually did fire already.
1448                         */
1449                        return 0;
1450                }
1451
1452                error = -ERESTART_RESTARTBLOCK;
1453        }
1454
1455        return error;
1456}
1457
1458static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1459
1460static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1461                            struct timespec *rqtp, struct timespec __user *rmtp)
1462{
1463        struct restart_block *restart_block =
1464                &current_thread_info()->restart_block;
1465        struct itimerspec it;
1466        int error;
1467
1468        /*
1469         * Diagnose required errors first.
1470         */
1471        if (CPUCLOCK_PERTHREAD(which_clock) &&
1472            (CPUCLOCK_PID(which_clock) == 0 ||
1473             CPUCLOCK_PID(which_clock) == current->pid))
1474                return -EINVAL;
1475
1476        error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1477
1478        if (error == -ERESTART_RESTARTBLOCK) {
1479
1480                if (flags & TIMER_ABSTIME)
1481                        return -ERESTARTNOHAND;
1482                /*
1483                 * Report back to the user the time still remaining.
1484                 */
1485                if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1486                        return -EFAULT;
1487
1488                restart_block->fn = posix_cpu_nsleep_restart;
1489                restart_block->nanosleep.clockid = which_clock;
1490                restart_block->nanosleep.rmtp = rmtp;
1491                restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1492        }
1493        return error;
1494}
1495
1496static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1497{
1498        clockid_t which_clock = restart_block->nanosleep.clockid;
1499        struct timespec t;
1500        struct itimerspec it;
1501        int error;
1502
1503        t = ns_to_timespec(restart_block->nanosleep.expires);
1504
1505        error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1506
1507        if (error == -ERESTART_RESTARTBLOCK) {
1508                struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
1509                /*
1510                 * Report back to the user the time still remaining.
1511                 */
1512                if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1513                        return -EFAULT;
1514
1515                restart_block->nanosleep.expires = timespec_to_ns(&t);
1516        }
1517        return error;
1518
1519}
1520
1521#define PROCESS_CLOCK   MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1522#define THREAD_CLOCK    MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1523
1524static int process_cpu_clock_getres(const clockid_t which_clock,
1525                                    struct timespec *tp)
1526{
1527        return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1528}
1529static int process_cpu_clock_get(const clockid_t which_clock,
1530                                 struct timespec *tp)
1531{
1532        return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1533}
1534static int process_cpu_timer_create(struct k_itimer *timer)
1535{
1536        timer->it_clock = PROCESS_CLOCK;
1537        return posix_cpu_timer_create(timer);
1538}
1539static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1540                              struct timespec *rqtp,
1541                              struct timespec __user *rmtp)
1542{
1543        return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1544}
1545static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1546{
1547        return -EINVAL;
1548}
1549static int thread_cpu_clock_getres(const clockid_t which_clock,
1550                                   struct timespec *tp)
1551{
1552        return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1553}
1554static int thread_cpu_clock_get(const clockid_t which_clock,
1555                                struct timespec *tp)
1556{
1557        return posix_cpu_clock_get(THREAD_CLOCK, tp);
1558}
1559static int thread_cpu_timer_create(struct k_itimer *timer)
1560{
1561        timer->it_clock = THREAD_CLOCK;
1562        return posix_cpu_timer_create(timer);
1563}
1564
1565struct k_clock clock_posix_cpu = {
1566        .clock_getres   = posix_cpu_clock_getres,
1567        .clock_set      = posix_cpu_clock_set,
1568        .clock_get      = posix_cpu_clock_get,
1569        .timer_create   = posix_cpu_timer_create,
1570        .nsleep         = posix_cpu_nsleep,
1571        .nsleep_restart = posix_cpu_nsleep_restart,
1572        .timer_set      = posix_cpu_timer_set,
1573        .timer_del      = posix_cpu_timer_del,
1574        .timer_get      = posix_cpu_timer_get,
1575};
1576
1577static __init int init_posix_cpu_timers(void)
1578{
1579        struct k_clock process = {
1580                .clock_getres   = process_cpu_clock_getres,
1581                .clock_get      = process_cpu_clock_get,
1582                .timer_create   = process_cpu_timer_create,
1583                .nsleep         = process_cpu_nsleep,
1584                .nsleep_restart = process_cpu_nsleep_restart,
1585        };
1586        struct k_clock thread = {
1587                .clock_getres   = thread_cpu_clock_getres,
1588                .clock_get      = thread_cpu_clock_get,
1589                .timer_create   = thread_cpu_timer_create,
1590        };
1591        struct timespec ts;
1592
1593        posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1594        posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1595
1596        cputime_to_timespec(cputime_one_jiffy, &ts);
1597        onecputick = ts.tv_nsec;
1598        WARN_ON(ts.tv_sec != 0);
1599
1600        return 0;
1601}
1602__initcall(init_posix_cpu_timers);
1603
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.