linux/drivers/cpufreq/cpufreq_conservative.c
<<
>>
Prefs
   1/*
   2 *  drivers/cpufreq/cpufreq_conservative.c
   3 *
   4 *  Copyright (C)  2001 Russell King
   5 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
   6 *                      Jun Nakajima <jun.nakajima@intel.com>
   7 *            (C)  2009 Alexander Clouter <alex@digriz.org.uk>
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of the GNU General Public License version 2 as
  11 * published by the Free Software Foundation.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/init.h>
  17#include <linux/cpufreq.h>
  18#include <linux/cpu.h>
  19#include <linux/jiffies.h>
  20#include <linux/kernel_stat.h>
  21#include <linux/mutex.h>
  22#include <linux/hrtimer.h>
  23#include <linux/tick.h>
  24#include <linux/ktime.h>
  25#include <linux/sched.h>
  26
  27/*
  28 * dbs is used in this file as a shortform for demandbased switching
  29 * It helps to keep variable names smaller, simpler
  30 */
  31
  32#define DEF_FREQUENCY_UP_THRESHOLD              (80)
  33#define DEF_FREQUENCY_DOWN_THRESHOLD            (20)
  34
  35/*
  36 * The polling frequency of this governor depends on the capability of
  37 * the processor. Default polling frequency is 1000 times the transition
  38 * latency of the processor. The governor will work on any processor with
  39 * transition latency <= 10mS, using appropriate sampling
  40 * rate.
  41 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
  42 * this governor will not work.
  43 * All times here are in uS.
  44 */
  45#define MIN_SAMPLING_RATE_RATIO                 (2)
  46
  47static unsigned int min_sampling_rate;
  48
  49#define LATENCY_MULTIPLIER                      (1000)
  50#define MIN_LATENCY_MULTIPLIER                  (100)
  51#define DEF_SAMPLING_DOWN_FACTOR                (1)
  52#define MAX_SAMPLING_DOWN_FACTOR                (10)
  53#define TRANSITION_LATENCY_LIMIT                (10 * 1000 * 1000)
  54
  55static void do_dbs_timer(struct work_struct *work);
  56
  57struct cpu_dbs_info_s {
  58        cputime64_t prev_cpu_idle;
  59        cputime64_t prev_cpu_wall;
  60        cputime64_t prev_cpu_nice;
  61        struct cpufreq_policy *cur_policy;
  62        struct delayed_work work;
  63        unsigned int down_skip;
  64        unsigned int requested_freq;
  65        int cpu;
  66        unsigned int enable:1;
  67        /*
  68         * percpu mutex that serializes governor limit change with
  69         * do_dbs_timer invocation. We do not want do_dbs_timer to run
  70         * when user is changing the governor or limits.
  71         */
  72        struct mutex timer_mutex;
  73};
  74static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);
  75
  76static unsigned int dbs_enable; /* number of CPUs using this policy */
  77
  78/*
  79 * dbs_mutex protects dbs_enable in governor start/stop.
  80 */
  81static DEFINE_MUTEX(dbs_mutex);
  82
  83static struct dbs_tuners {
  84        unsigned int sampling_rate;
  85        unsigned int sampling_down_factor;
  86        unsigned int up_threshold;
  87        unsigned int down_threshold;
  88        unsigned int ignore_nice;
  89        unsigned int freq_step;
  90} dbs_tuners_ins = {
  91        .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
  92        .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
  93        .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
  94        .ignore_nice = 0,
  95        .freq_step = 5,
  96};
  97
  98static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
  99{
 100        u64 idle_time;
 101        u64 cur_wall_time;
 102        u64 busy_time;
 103
 104        cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
 105
 106        busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
 107        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
 108        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
 109        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
 110        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
 111        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 112
 113        idle_time = cur_wall_time - busy_time;
 114        if (wall)
 115                *wall = jiffies_to_usecs(cur_wall_time);
 116
 117        return jiffies_to_usecs(idle_time);
 118}
 119
 120static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
 121{
 122        u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
 123
 124        if (idle_time == -1ULL)
 125                return get_cpu_idle_time_jiffy(cpu, wall);
 126        else
 127                idle_time += get_cpu_iowait_time_us(cpu, wall);
 128
 129        return idle_time;
 130}
 131
 132/* keep track of frequency transitions */
 133static int
 134dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 135                     void *data)
 136{
 137        struct cpufreq_freqs *freq = data;
 138        struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,
 139                                                        freq->cpu);
 140
 141        struct cpufreq_policy *policy;
 142
 143        if (!this_dbs_info->enable)
 144                return 0;
 145
 146        policy = this_dbs_info->cur_policy;
 147
 148        /*
 149         * we only care if our internally tracked freq moves outside
 150         * the 'valid' ranges of freqency available to us otherwise
 151         * we do not change it
 152        */
 153        if (this_dbs_info->requested_freq > policy->max
 154                        || this_dbs_info->requested_freq < policy->min)
 155                this_dbs_info->requested_freq = freq->new;
 156
 157        return 0;
 158}
 159
 160static struct notifier_block dbs_cpufreq_notifier_block = {
 161        .notifier_call = dbs_cpufreq_notifier
 162};
 163
 164/************************** sysfs interface ************************/
 165static ssize_t show_sampling_rate_min(struct kobject *kobj,
 166                                      struct attribute *attr, char *buf)
 167{
 168        return sprintf(buf, "%u\n", min_sampling_rate);
 169}
 170
 171define_one_global_ro(sampling_rate_min);
 172
 173/* cpufreq_conservative Governor Tunables */
 174#define show_one(file_name, object)                                     \
 175static ssize_t show_##file_name                                         \
 176(struct kobject *kobj, struct attribute *attr, char *buf)               \
 177{                                                                       \
 178        return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
 179}
 180show_one(sampling_rate, sampling_rate);
 181show_one(sampling_down_factor, sampling_down_factor);
 182show_one(up_threshold, up_threshold);
 183show_one(down_threshold, down_threshold);
 184show_one(ignore_nice_load, ignore_nice);
 185show_one(freq_step, freq_step);
 186
 187static ssize_t store_sampling_down_factor(struct kobject *a,
 188                                          struct attribute *b,
 189                                          const char *buf, size_t count)
 190{
 191        unsigned int input;
 192        int ret;
 193        ret = sscanf(buf, "%u", &input);
 194
 195        if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
 196                return -EINVAL;
 197
 198        dbs_tuners_ins.sampling_down_factor = input;
 199        return count;
 200}
 201
 202static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
 203                                   const char *buf, size_t count)
 204{
 205        unsigned int input;
 206        int ret;
 207        ret = sscanf(buf, "%u", &input);
 208
 209        if (ret != 1)
 210                return -EINVAL;
 211
 212        dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
 213        return count;
 214}
 215
 216static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
 217                                  const char *buf, size_t count)
 218{
 219        unsigned int input;
 220        int ret;
 221        ret = sscanf(buf, "%u", &input);
 222
 223        if (ret != 1 || input > 100 ||
 224                        input <= dbs_tuners_ins.down_threshold)
 225                return -EINVAL;
 226
 227        dbs_tuners_ins.up_threshold = input;
 228        return count;
 229}
 230
 231static ssize_t store_down_threshold(struct kobject *a, struct attribute *b,
 232                                    const char *buf, size_t count)
 233{
 234        unsigned int input;
 235        int ret;
 236        ret = sscanf(buf, "%u", &input);
 237
 238        /* cannot be lower than 11 otherwise freq will not fall */
 239        if (ret != 1 || input < 11 || input > 100 ||
 240                        input >= dbs_tuners_ins.up_threshold)
 241                return -EINVAL;
 242
 243        dbs_tuners_ins.down_threshold = input;
 244        return count;
 245}
 246
 247static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 248                                      const char *buf, size_t count)
 249{
 250        unsigned int input;
 251        int ret;
 252
 253        unsigned int j;
 254
 255        ret = sscanf(buf, "%u", &input);
 256        if (ret != 1)
 257                return -EINVAL;
 258
 259        if (input > 1)
 260                input = 1;
 261
 262        if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */
 263                return count;
 264
 265        dbs_tuners_ins.ignore_nice = input;
 266
 267        /* we need to re-evaluate prev_cpu_idle */
 268        for_each_online_cpu(j) {
 269                struct cpu_dbs_info_s *dbs_info;
 270                dbs_info = &per_cpu(cs_cpu_dbs_info, j);
 271                dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 272                                                &dbs_info->prev_cpu_wall);
 273                if (dbs_tuners_ins.ignore_nice)
 274                        dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 275        }
 276        return count;
 277}
 278
 279static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
 280                               const char *buf, size_t count)
 281{
 282        unsigned int input;
 283        int ret;
 284        ret = sscanf(buf, "%u", &input);
 285
 286        if (ret != 1)
 287                return -EINVAL;
 288
 289        if (input > 100)
 290                input = 100;
 291
 292        /* no need to test here if freq_step is zero as the user might actually
 293         * want this, they would be crazy though :) */
 294        dbs_tuners_ins.freq_step = input;
 295        return count;
 296}
 297
 298define_one_global_rw(sampling_rate);
 299define_one_global_rw(sampling_down_factor);
 300define_one_global_rw(up_threshold);
 301define_one_global_rw(down_threshold);
 302define_one_global_rw(ignore_nice_load);
 303define_one_global_rw(freq_step);
 304
 305static struct attribute *dbs_attributes[] = {
 306        &sampling_rate_min.attr,
 307        &sampling_rate.attr,
 308        &sampling_down_factor.attr,
 309        &up_threshold.attr,
 310        &down_threshold.attr,
 311        &ignore_nice_load.attr,
 312        &freq_step.attr,
 313        NULL
 314};
 315
 316static struct attribute_group dbs_attr_group = {
 317        .attrs = dbs_attributes,
 318        .name = "conservative",
 319};
 320
 321/************************** sysfs end ************************/
 322
 323static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 324{
 325        unsigned int load = 0;
 326        unsigned int max_load = 0;
 327        unsigned int freq_target;
 328
 329        struct cpufreq_policy *policy;
 330        unsigned int j;
 331
 332        policy = this_dbs_info->cur_policy;
 333
 334        /*
 335         * Every sampling_rate, we check, if current idle time is less
 336         * than 20% (default), then we try to increase frequency
 337         * Every sampling_rate*sampling_down_factor, we check, if current
 338         * idle time is more than 80%, then we try to decrease frequency
 339         *
 340         * Any frequency increase takes it to the maximum frequency.
 341         * Frequency reduction happens at minimum steps of
 342         * 5% (default) of maximum frequency
 343         */
 344
 345        /* Get Absolute Load */
 346        for_each_cpu(j, policy->cpus) {
 347                struct cpu_dbs_info_s *j_dbs_info;
 348                cputime64_t cur_wall_time, cur_idle_time;
 349                unsigned int idle_time, wall_time;
 350
 351                j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
 352
 353                cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 354
 355                wall_time = (unsigned int)
 356                        (cur_wall_time - j_dbs_info->prev_cpu_wall);
 357                j_dbs_info->prev_cpu_wall = cur_wall_time;
 358
 359                idle_time = (unsigned int)
 360                        (cur_idle_time - j_dbs_info->prev_cpu_idle);
 361                j_dbs_info->prev_cpu_idle = cur_idle_time;
 362
 363                if (dbs_tuners_ins.ignore_nice) {
 364                        u64 cur_nice;
 365                        unsigned long cur_nice_jiffies;
 366
 367                        cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
 368                                         j_dbs_info->prev_cpu_nice;
 369                        /*
 370                         * Assumption: nice time between sampling periods will
 371                         * be less than 2^32 jiffies for 32 bit sys
 372                         */
 373                        cur_nice_jiffies = (unsigned long)
 374                                        cputime64_to_jiffies64(cur_nice);
 375
 376                        j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 377                        idle_time += jiffies_to_usecs(cur_nice_jiffies);
 378                }
 379
 380                if (unlikely(!wall_time || wall_time < idle_time))
 381                        continue;
 382
 383                load = 100 * (wall_time - idle_time) / wall_time;
 384
 385                if (load > max_load)
 386                        max_load = load;
 387        }
 388
 389        /*
 390         * break out if we 'cannot' reduce the speed as the user might
 391         * want freq_step to be zero
 392         */
 393        if (dbs_tuners_ins.freq_step == 0)
 394                return;
 395
 396        /* Check for frequency increase */
 397        if (max_load > dbs_tuners_ins.up_threshold) {
 398                this_dbs_info->down_skip = 0;
 399
 400                /* if we are already at full speed then break out early */
 401                if (this_dbs_info->requested_freq == policy->max)
 402                        return;
 403
 404                freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
 405
 406                /* max freq cannot be less than 100. But who knows.... */
 407                if (unlikely(freq_target == 0))
 408                        freq_target = 5;
 409
 410                this_dbs_info->requested_freq += freq_target;
 411                if (this_dbs_info->requested_freq > policy->max)
 412                        this_dbs_info->requested_freq = policy->max;
 413
 414                __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
 415                        CPUFREQ_RELATION_H);
 416                return;
 417        }
 418
 419        /*
 420         * The optimal frequency is the frequency that is the lowest that
 421         * can support the current CPU usage without triggering the up
 422         * policy. To be safe, we focus 10 points under the threshold.
 423         */
 424        if (max_load < (dbs_tuners_ins.down_threshold - 10)) {
 425                freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
 426
 427                this_dbs_info->requested_freq -= freq_target;
 428                if (this_dbs_info->requested_freq < policy->min)
 429                        this_dbs_info->requested_freq = policy->min;
 430
 431                /*
 432                 * if we cannot reduce the frequency anymore, break out early
 433                 */
 434                if (policy->cur == policy->min)
 435                        return;
 436
 437                __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
 438                                CPUFREQ_RELATION_H);
 439                return;
 440        }
 441}
 442
 443static void do_dbs_timer(struct work_struct *work)
 444{
 445        struct cpu_dbs_info_s *dbs_info =
 446                container_of(work, struct cpu_dbs_info_s, work.work);
 447        unsigned int cpu = dbs_info->cpu;
 448
 449        /* We want all CPUs to do sampling nearly on same jiffy */
 450        int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 451
 452        delay -= jiffies % delay;
 453
 454        mutex_lock(&dbs_info->timer_mutex);
 455
 456        dbs_check_cpu(dbs_info);
 457
 458        schedule_delayed_work_on(cpu, &dbs_info->work, delay);
 459        mutex_unlock(&dbs_info->timer_mutex);
 460}
 461
 462static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 463{
 464        /* We want all CPUs to do sampling nearly on same jiffy */
 465        int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 466        delay -= jiffies % delay;
 467
 468        dbs_info->enable = 1;
 469        INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
 470        schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
 471}
 472
 473static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
 474{
 475        dbs_info->enable = 0;
 476        cancel_delayed_work_sync(&dbs_info->work);
 477}
 478
 479static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 480                                   unsigned int event)
 481{
 482        unsigned int cpu = policy->cpu;
 483        struct cpu_dbs_info_s *this_dbs_info;
 484        unsigned int j;
 485        int rc;
 486
 487        this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
 488
 489        switch (event) {
 490        case CPUFREQ_GOV_START:
 491                if ((!cpu_online(cpu)) || (!policy->cur))
 492                        return -EINVAL;
 493
 494                mutex_lock(&dbs_mutex);
 495
 496                for_each_cpu(j, policy->cpus) {
 497                        struct cpu_dbs_info_s *j_dbs_info;
 498                        j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
 499                        j_dbs_info->cur_policy = policy;
 500
 501                        j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 502                                                &j_dbs_info->prev_cpu_wall);
 503                        if (dbs_tuners_ins.ignore_nice)
 504                                j_dbs_info->prev_cpu_nice =
 505                                                kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 506                }
 507                this_dbs_info->cpu = cpu;
 508                this_dbs_info->down_skip = 0;
 509                this_dbs_info->requested_freq = policy->cur;
 510
 511                mutex_init(&this_dbs_info->timer_mutex);
 512                dbs_enable++;
 513                /*
 514                 * Start the timerschedule work, when this governor
 515                 * is used for first time
 516                 */
 517                if (dbs_enable == 1) {
 518                        unsigned int latency;
 519                        /* policy latency is in nS. Convert it to uS first */
 520                        latency = policy->cpuinfo.transition_latency / 1000;
 521                        if (latency == 0)
 522                                latency = 1;
 523
 524                        rc = sysfs_create_group(cpufreq_global_kobject,
 525                                                &dbs_attr_group);
 526                        if (rc) {
 527                                mutex_unlock(&dbs_mutex);
 528                                return rc;
 529                        }
 530
 531                        /*
 532                         * conservative does not implement micro like ondemand
 533                         * governor, thus we are bound to jiffes/HZ
 534                         */
 535                        min_sampling_rate =
 536                                MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
 537                        /* Bring kernel and HW constraints together */
 538                        min_sampling_rate = max(min_sampling_rate,
 539                                        MIN_LATENCY_MULTIPLIER * latency);
 540                        dbs_tuners_ins.sampling_rate =
 541                                max(min_sampling_rate,
 542                                    latency * LATENCY_MULTIPLIER);
 543
 544                        cpufreq_register_notifier(
 545                                        &dbs_cpufreq_notifier_block,
 546                                        CPUFREQ_TRANSITION_NOTIFIER);
 547                }
 548                mutex_unlock(&dbs_mutex);
 549
 550                dbs_timer_init(this_dbs_info);
 551
 552                break;
 553
 554        case CPUFREQ_GOV_STOP:
 555                dbs_timer_exit(this_dbs_info);
 556
 557                mutex_lock(&dbs_mutex);
 558                dbs_enable--;
 559                mutex_destroy(&this_dbs_info->timer_mutex);
 560
 561                /*
 562                 * Stop the timerschedule work, when this governor
 563                 * is used for first time
 564                 */
 565                if (dbs_enable == 0)
 566                        cpufreq_unregister_notifier(
 567                                        &dbs_cpufreq_notifier_block,
 568                                        CPUFREQ_TRANSITION_NOTIFIER);
 569
 570                mutex_unlock(&dbs_mutex);
 571                if (!dbs_enable)
 572                        sysfs_remove_group(cpufreq_global_kobject,
 573                                           &dbs_attr_group);
 574
 575                break;
 576
 577        case CPUFREQ_GOV_LIMITS:
 578                mutex_lock(&this_dbs_info->timer_mutex);
 579                if (policy->max < this_dbs_info->cur_policy->cur)
 580                        __cpufreq_driver_target(
 581                                        this_dbs_info->cur_policy,
 582                                        policy->max, CPUFREQ_RELATION_H);
 583                else if (policy->min > this_dbs_info->cur_policy->cur)
 584                        __cpufreq_driver_target(
 585                                        this_dbs_info->cur_policy,
 586                                        policy->min, CPUFREQ_RELATION_L);
 587                dbs_check_cpu(this_dbs_info);
 588                mutex_unlock(&this_dbs_info->timer_mutex);
 589
 590                break;
 591        }
 592        return 0;
 593}
 594
 595#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 596static
 597#endif
 598struct cpufreq_governor cpufreq_gov_conservative = {
 599        .name                   = "conservative",
 600        .governor               = cpufreq_governor_dbs,
 601        .max_transition_latency = TRANSITION_LATENCY_LIMIT,
 602        .owner                  = THIS_MODULE,
 603};
 604
 605static int __init cpufreq_gov_dbs_init(void)
 606{
 607        return cpufreq_register_governor(&cpufreq_gov_conservative);
 608}
 609
 610static void __exit cpufreq_gov_dbs_exit(void)
 611{
 612        cpufreq_unregister_governor(&cpufreq_gov_conservative);
 613}
 614
 615
 616MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
 617MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
 618                "Low Latency Frequency Transition capable processors "
 619                "optimised for use in a battery environment");
 620MODULE_LICENSE("GPL");
 621
 622#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 623fs_initcall(cpufreq_gov_dbs_init);
 624#else
 625module_init(cpufreq_gov_dbs_init);
 626#endif
 627module_exit(cpufreq_gov_dbs_exit);
 628
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.