linux/kernel/sched/cputime.c
<<
>>
Prefs
   1#include <linux/export.h>
   2#include <linux/sched.h>
   3#include <linux/tsacct_kern.h>
   4#include <linux/kernel_stat.h>
   5#include <linux/static_key.h>
   6#include "sched.h"
   7
   8
   9#ifdef CONFIG_IRQ_TIME_ACCOUNTING
  10
  11/*
  12 * There are no locks covering percpu hardirq/softirq time.
  13 * They are only modified in vtime_account, on corresponding CPU
  14 * with interrupts disabled. So, writes are safe.
  15 * They are read and saved off onto struct rq in update_rq_clock().
  16 * This may result in other CPU reading this CPU's irq time and can
  17 * race with irq/vtime_account on this CPU. We would either get old
  18 * or new value with a side effect of accounting a slice of irq time to wrong
  19 * task when irq is in progress while we read rq->clock. That is a worthy
  20 * compromise in place of having locks on each irq in account_system_time.
  21 */
  22DEFINE_PER_CPU(u64, cpu_hardirq_time);
  23DEFINE_PER_CPU(u64, cpu_softirq_time);
  24
  25static DEFINE_PER_CPU(u64, irq_start_time);
  26static int sched_clock_irqtime;
  27
  28void enable_sched_clock_irqtime(void)
  29{
  30        sched_clock_irqtime = 1;
  31}
  32
  33void disable_sched_clock_irqtime(void)
  34{
  35        sched_clock_irqtime = 0;
  36}
  37
  38#ifndef CONFIG_64BIT
  39DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  40#endif /* CONFIG_64BIT */
  41
  42/*
  43 * Called before incrementing preempt_count on {soft,}irq_enter
  44 * and before decrementing preempt_count on {soft,}irq_exit.
  45 */
  46void vtime_account(struct task_struct *curr)
  47{
  48        unsigned long flags;
  49        s64 delta;
  50        int cpu;
  51
  52        if (!sched_clock_irqtime)
  53                return;
  54
  55        local_irq_save(flags);
  56
  57        cpu = smp_processor_id();
  58        delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
  59        __this_cpu_add(irq_start_time, delta);
  60
  61        irq_time_write_begin();
  62        /*
  63         * We do not account for softirq time from ksoftirqd here.
  64         * We want to continue accounting softirq time to ksoftirqd thread
  65         * in that case, so as not to confuse scheduler with a special task
  66         * that do not consume any time, but still wants to run.
  67         */
  68        if (hardirq_count())
  69                __this_cpu_add(cpu_hardirq_time, delta);
  70        else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  71                __this_cpu_add(cpu_softirq_time, delta);
  72
  73        irq_time_write_end();
  74        local_irq_restore(flags);
  75}
  76EXPORT_SYMBOL_GPL(vtime_account);
  77
  78static int irqtime_account_hi_update(void)
  79{
  80        u64 *cpustat = kcpustat_this_cpu->cpustat;
  81        unsigned long flags;
  82        u64 latest_ns;
  83        int ret = 0;
  84
  85        local_irq_save(flags);
  86        latest_ns = this_cpu_read(cpu_hardirq_time);
  87        if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
  88                ret = 1;
  89        local_irq_restore(flags);
  90        return ret;
  91}
  92
  93static int irqtime_account_si_update(void)
  94{
  95        u64 *cpustat = kcpustat_this_cpu->cpustat;
  96        unsigned long flags;
  97        u64 latest_ns;
  98        int ret = 0;
  99
 100        local_irq_save(flags);
 101        latest_ns = this_cpu_read(cpu_softirq_time);
 102        if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
 103                ret = 1;
 104        local_irq_restore(flags);
 105        return ret;
 106}
 107
 108#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 109
 110#define sched_clock_irqtime     (0)
 111
 112#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 113
 114static inline void task_group_account_field(struct task_struct *p, int index,
 115                                            u64 tmp)
 116{
 117#ifdef CONFIG_CGROUP_CPUACCT
 118        struct kernel_cpustat *kcpustat;
 119        struct cpuacct *ca;
 120#endif
 121        /*
 122         * Since all updates are sure to touch the root cgroup, we
 123         * get ourselves ahead and touch it first. If the root cgroup
 124         * is the only cgroup, then nothing else should be necessary.
 125         *
 126         */
 127        __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
 128
 129#ifdef CONFIG_CGROUP_CPUACCT
 130        if (unlikely(!cpuacct_subsys.active))
 131                return;
 132
 133        rcu_read_lock();
 134        ca = task_ca(p);
 135        while (ca && (ca != &root_cpuacct)) {
 136                kcpustat = this_cpu_ptr(ca->cpustat);
 137                kcpustat->cpustat[index] += tmp;
 138                ca = parent_ca(ca);
 139        }
 140        rcu_read_unlock();
 141#endif
 142}
 143
 144/*
 145 * Account user cpu time to a process.
 146 * @p: the process that the cpu time gets accounted to
 147 * @cputime: the cpu time spent in user space since the last update
 148 * @cputime_scaled: cputime scaled by cpu frequency
 149 */
 150void account_user_time(struct task_struct *p, cputime_t cputime,
 151                       cputime_t cputime_scaled)
 152{
 153        int index;
 154
 155        /* Add user time to process. */
 156        p->utime += cputime;
 157        p->utimescaled += cputime_scaled;
 158        account_group_user_time(p, cputime);
 159
 160        index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 161
 162        /* Add user time to cpustat. */
 163        task_group_account_field(p, index, (__force u64) cputime);
 164
 165        /* Account for user time used */
 166        acct_update_integrals(p);
 167}
 168
 169/*
 170 * Account guest cpu time to a process.
 171 * @p: the process that the cpu time gets accounted to
 172 * @cputime: the cpu time spent in virtual machine since the last update
 173 * @cputime_scaled: cputime scaled by cpu frequency
 174 */
 175static void account_guest_time(struct task_struct *p, cputime_t cputime,
 176                               cputime_t cputime_scaled)
 177{
 178        u64 *cpustat = kcpustat_this_cpu->cpustat;
 179
 180        /* Add guest time to process. */
 181        p->utime += cputime;
 182        p->utimescaled += cputime_scaled;
 183        account_group_user_time(p, cputime);
 184        p->gtime += cputime;
 185
 186        /* Add guest time to cpustat. */
 187        if (TASK_NICE(p) > 0) {
 188                cpustat[CPUTIME_NICE] += (__force u64) cputime;
 189                cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
 190        } else {
 191                cpustat[CPUTIME_USER] += (__force u64) cputime;
 192                cpustat[CPUTIME_GUEST] += (__force u64) cputime;
 193        }
 194}
 195
 196/*
 197 * Account system cpu time to a process and desired cpustat field
 198 * @p: the process that the cpu time gets accounted to
 199 * @cputime: the cpu time spent in kernel space since the last update
 200 * @cputime_scaled: cputime scaled by cpu frequency
 201 * @target_cputime64: pointer to cpustat field that has to be updated
 202 */
 203static inline
 204void __account_system_time(struct task_struct *p, cputime_t cputime,
 205                        cputime_t cputime_scaled, int index)
 206{
 207        /* Add system time to process. */
 208        p->stime += cputime;
 209        p->stimescaled += cputime_scaled;
 210        account_group_system_time(p, cputime);
 211
 212        /* Add system time to cpustat. */
 213        task_group_account_field(p, index, (__force u64) cputime);
 214
 215        /* Account for system time used */
 216        acct_update_integrals(p);
 217}
 218
 219/*
 220 * Account system cpu time to a process.
 221 * @p: the process that the cpu time gets accounted to
 222 * @hardirq_offset: the offset to subtract from hardirq_count()
 223 * @cputime: the cpu time spent in kernel space since the last update
 224 * @cputime_scaled: cputime scaled by cpu frequency
 225 */
 226void account_system_time(struct task_struct *p, int hardirq_offset,
 227                         cputime_t cputime, cputime_t cputime_scaled)
 228{
 229        int index;
 230
 231        if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 232                account_guest_time(p, cputime, cputime_scaled);
 233                return;
 234        }
 235
 236        if (hardirq_count() - hardirq_offset)
 237                index = CPUTIME_IRQ;
 238        else if (in_serving_softirq())
 239                index = CPUTIME_SOFTIRQ;
 240        else
 241                index = CPUTIME_SYSTEM;
 242
 243        __account_system_time(p, cputime, cputime_scaled, index);
 244}
 245
 246/*
 247 * Account for involuntary wait time.
 248 * @cputime: the cpu time spent in involuntary wait
 249 */
 250void account_steal_time(cputime_t cputime)
 251{
 252        u64 *cpustat = kcpustat_this_cpu->cpustat;
 253
 254        cpustat[CPUTIME_STEAL] += (__force u64) cputime;
 255}
 256
 257/*
 258 * Account for idle time.
 259 * @cputime: the cpu time spent in idle wait
 260 */
 261void account_idle_time(cputime_t cputime)
 262{
 263        u64 *cpustat = kcpustat_this_cpu->cpustat;
 264        struct rq *rq = this_rq();
 265
 266        if (atomic_read(&rq->nr_iowait) > 0)
 267                cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
 268        else
 269                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 270}
 271
 272static __always_inline bool steal_account_process_tick(void)
 273{
 274#ifdef CONFIG_PARAVIRT
 275        if (static_key_false(&paravirt_steal_enabled)) {
 276                u64 steal, st = 0;
 277
 278                steal = paravirt_steal_clock(smp_processor_id());
 279                steal -= this_rq()->prev_steal_time;
 280
 281                st = steal_ticks(steal);
 282                this_rq()->prev_steal_time += st * TICK_NSEC;
 283
 284                account_steal_time(st);
 285                return st;
 286        }
 287#endif
 288        return false;
 289}
 290
 291#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 292
 293#ifdef CONFIG_IRQ_TIME_ACCOUNTING
 294/*
 295 * Account a tick to a process and cpustat
 296 * @p: the process that the cpu time gets accounted to
 297 * @user_tick: is the tick from userspace
 298 * @rq: the pointer to rq
 299 *
 300 * Tick demultiplexing follows the order
 301 * - pending hardirq update
 302 * - pending softirq update
 303 * - user_time
 304 * - idle_time
 305 * - system time
 306 *   - check for guest_time
 307 *   - else account as system_time
 308 *
 309 * Check for hardirq is done both for system and user time as there is
 310 * no timer going off while we are on hardirq and hence we may never get an
 311 * opportunity to update it solely in system time.
 312 * p->stime and friends are only updated on system time and not on irq
 313 * softirq as those do not count in task exec_runtime any more.
 314 */
 315static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 316                                                struct rq *rq)
 317{
 318        cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 319        u64 *cpustat = kcpustat_this_cpu->cpustat;
 320
 321        if (steal_account_process_tick())
 322                return;
 323
 324        if (irqtime_account_hi_update()) {
 325                cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
 326        } else if (irqtime_account_si_update()) {
 327                cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
 328        } else if (this_cpu_ksoftirqd() == p) {
 329                /*
 330                 * ksoftirqd time do not get accounted in cpu_softirq_time.
 331                 * So, we have to handle it separately here.
 332                 * Also, p->stime needs to be updated for ksoftirqd.
 333                 */
 334                __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
 335                                        CPUTIME_SOFTIRQ);
 336        } else if (user_tick) {
 337                account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 338        } else if (p == rq->idle) {
 339                account_idle_time(cputime_one_jiffy);
 340        } else if (p->flags & PF_VCPU) { /* System time or guest time */
 341                account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
 342        } else {
 343                __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
 344                                        CPUTIME_SYSTEM);
 345        }
 346}
 347
 348static void irqtime_account_idle_ticks(int ticks)
 349{
 350        int i;
 351        struct rq *rq = this_rq();
 352
 353        for (i = 0; i < ticks; i++)
 354                irqtime_account_process_tick(current, 0, rq);
 355}
 356#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 357static void irqtime_account_idle_ticks(int ticks) {}
 358static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 359                                                struct rq *rq) {}
 360#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 361
 362/*
 363 * Account a single tick of cpu time.
 364 * @p: the process that the cpu time gets accounted to
 365 * @user_tick: indicates if the tick is a user or a system tick
 366 */
 367void account_process_tick(struct task_struct *p, int user_tick)
 368{
 369        cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 370        struct rq *rq = this_rq();
 371
 372        if (sched_clock_irqtime) {
 373                irqtime_account_process_tick(p, user_tick, rq);
 374                return;
 375        }
 376
 377        if (steal_account_process_tick())
 378                return;
 379
 380        if (user_tick)
 381                account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 382        else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
 383                account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
 384                                    one_jiffy_scaled);
 385        else
 386                account_idle_time(cputime_one_jiffy);
 387}
 388
 389/*
 390 * Account multiple ticks of steal time.
 391 * @p: the process from which the cpu time has been stolen
 392 * @ticks: number of stolen ticks
 393 */
 394void account_steal_ticks(unsigned long ticks)
 395{
 396        account_steal_time(jiffies_to_cputime(ticks));
 397}
 398
 399/*
 400 * Account multiple ticks of idle time.
 401 * @ticks: number of stolen ticks
 402 */
 403void account_idle_ticks(unsigned long ticks)
 404{
 405
 406        if (sched_clock_irqtime) {
 407                irqtime_account_idle_ticks(ticks);
 408                return;
 409        }
 410
 411        account_idle_time(jiffies_to_cputime(ticks));
 412}
 413
 414#endif
 415
 416/*
 417 * Use precise platform statistics if available:
 418 */
 419#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 420void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 421{
 422        *ut = p->utime;
 423        *st = p->stime;
 424}
 425
 426void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 427{
 428        struct task_cputime cputime;
 429
 430        thread_group_cputime(p, &cputime);
 431
 432        *ut = cputime.utime;
 433        *st = cputime.stime;
 434}
 435
 436/*
 437 * Archs that account the whole time spent in the idle task
 438 * (outside irq) as idle time can rely on this and just implement
 439 * vtime_account_system() and vtime_account_idle(). Archs that
 440 * have other meaning of the idle time (s390 only includes the
 441 * time spent by the CPU when it's in low power mode) must override
 442 * vtime_account().
 443 */
 444#ifndef __ARCH_HAS_VTIME_ACCOUNT
 445void vtime_account(struct task_struct *tsk)
 446{
 447        unsigned long flags;
 448
 449        local_irq_save(flags);
 450
 451        if (in_interrupt() || !is_idle_task(tsk))
 452                vtime_account_system(tsk);
 453        else
 454                vtime_account_idle(tsk);
 455
 456        local_irq_restore(flags);
 457}
 458EXPORT_SYMBOL_GPL(vtime_account);
 459#endif /* __ARCH_HAS_VTIME_ACCOUNT */
 460
 461#else
 462
 463#ifndef nsecs_to_cputime
 464# define nsecs_to_cputime(__nsecs)      nsecs_to_jiffies(__nsecs)
 465#endif
 466
 467static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
 468{
 469        u64 temp = (__force u64) rtime;
 470
 471        temp *= (__force u64) utime;
 472
 473        if (sizeof(cputime_t) == 4)
 474                temp = div_u64(temp, (__force u32) total);
 475        else
 476                temp = div64_u64(temp, (__force u64) total);
 477
 478        return (__force cputime_t) temp;
 479}
 480
 481void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 482{
 483        cputime_t rtime, utime = p->utime, total = utime + p->stime;
 484
 485        /*
 486         * Use CFS's precise accounting:
 487         */
 488        rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 489
 490        if (total)
 491                utime = scale_utime(utime, rtime, total);
 492        else
 493                utime = rtime;
 494
 495        /*
 496         * Compare with previous values, to keep monotonicity:
 497         */
 498        p->prev_utime = max(p->prev_utime, utime);
 499        p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
 500
 501        *ut = p->prev_utime;
 502        *st = p->prev_stime;
 503}
 504
 505/*
 506 * Must be called with siglock held.
 507 */
 508void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 509{
 510        struct signal_struct *sig = p->signal;
 511        struct task_cputime cputime;
 512        cputime_t rtime, utime, total;
 513
 514        thread_group_cputime(p, &cputime);
 515
 516        total = cputime.utime + cputime.stime;
 517        rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
 518
 519        if (total)
 520                utime = scale_utime(cputime.utime, rtime, total);
 521        else
 522                utime = rtime;
 523
 524        sig->prev_utime = max(sig->prev_utime, utime);
 525        sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
 526
 527        *ut = sig->prev_utime;
 528        *st = sig->prev_stime;
 529}
 530#endif
 531
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.