linux/drivers/cpufreq/cpufreq_governor.c
<<
>>
Prefs
   1/*
   2 * drivers/cpufreq/cpufreq_governor.c
   3 *
   4 * CPUFREQ governors common code
   5 *
   6 * Copyright    (C) 2001 Russell King
   7 *              (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
   8 *              (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
   9 *              (C) 2009 Alexander Clouter <alex@digriz.org.uk>
  10 *              (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License version 2 as
  14 * published by the Free Software Foundation.
  15 */
  16
  17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19#include <asm/cputime.h>
  20#include <linux/cpufreq.h>
  21#include <linux/cpumask.h>
  22#include <linux/export.h>
  23#include <linux/kernel_stat.h>
  24#include <linux/mutex.h>
  25#include <linux/slab.h>
  26#include <linux/tick.h>
  27#include <linux/types.h>
  28#include <linux/workqueue.h>
  29#include <linux/cpu.h>
  30
  31#include "cpufreq_governor.h"
  32
  33static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
  34{
  35        if (have_governor_per_policy())
  36                return &policy->kobj;
  37        else
  38                return cpufreq_global_kobject;
  39}
  40
  41static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
  42{
  43        if (have_governor_per_policy())
  44                return dbs_data->cdata->attr_group_gov_pol;
  45        else
  46                return dbs_data->cdata->attr_group_gov_sys;
  47}
  48
  49static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
  50{
  51        u64 idle_time;
  52        u64 cur_wall_time;
  53        u64 busy_time;
  54
  55        cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
  56
  57        busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
  58        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
  59        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
  60        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
  61        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
  62        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
  63
  64        idle_time = cur_wall_time - busy_time;
  65        if (wall)
  66                *wall = cputime_to_usecs(cur_wall_time);
  67
  68        return cputime_to_usecs(idle_time);
  69}
  70
  71u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
  72{
  73        u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
  74
  75        if (idle_time == -1ULL)
  76                return get_cpu_idle_time_jiffy(cpu, wall);
  77        else if (!io_busy)
  78                idle_time += get_cpu_iowait_time_us(cpu, wall);
  79
  80        return idle_time;
  81}
  82EXPORT_SYMBOL_GPL(get_cpu_idle_time);
  83
  84void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
  85{
  86        struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
  87        struct od_dbs_tuners *od_tuners = dbs_data->tuners;
  88        struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
  89        struct cpufreq_policy *policy;
  90        unsigned int max_load = 0;
  91        unsigned int ignore_nice;
  92        unsigned int j;
  93
  94        if (dbs_data->cdata->governor == GOV_ONDEMAND)
  95                ignore_nice = od_tuners->ignore_nice;
  96        else
  97                ignore_nice = cs_tuners->ignore_nice;
  98
  99        policy = cdbs->cur_policy;
 100
 101        /* Get Absolute Load (in terms of freq for ondemand gov) */
 102        for_each_cpu(j, policy->cpus) {
 103                struct cpu_dbs_common_info *j_cdbs;
 104                u64 cur_wall_time, cur_idle_time;
 105                unsigned int idle_time, wall_time;
 106                unsigned int load;
 107                int io_busy = 0;
 108
 109                j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
 110
 111                /*
 112                 * For the purpose of ondemand, waiting for disk IO is
 113                 * an indication that you're performance critical, and
 114                 * not that the system is actually idle. So do not add
 115                 * the iowait time to the cpu idle time.
 116                 */
 117                if (dbs_data->cdata->governor == GOV_ONDEMAND)
 118                        io_busy = od_tuners->io_is_busy;
 119                cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
 120
 121                wall_time = (unsigned int)
 122                        (cur_wall_time - j_cdbs->prev_cpu_wall);
 123                j_cdbs->prev_cpu_wall = cur_wall_time;
 124
 125                idle_time = (unsigned int)
 126                        (cur_idle_time - j_cdbs->prev_cpu_idle);
 127                j_cdbs->prev_cpu_idle = cur_idle_time;
 128
 129                if (ignore_nice) {
 130                        u64 cur_nice;
 131                        unsigned long cur_nice_jiffies;
 132
 133                        cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
 134                                         cdbs->prev_cpu_nice;
 135                        /*
 136                         * Assumption: nice time between sampling periods will
 137                         * be less than 2^32 jiffies for 32 bit sys
 138                         */
 139                        cur_nice_jiffies = (unsigned long)
 140                                        cputime64_to_jiffies64(cur_nice);
 141
 142                        cdbs->prev_cpu_nice =
 143                                kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 144                        idle_time += jiffies_to_usecs(cur_nice_jiffies);
 145                }
 146
 147                if (unlikely(!wall_time || wall_time < idle_time))
 148                        continue;
 149
 150                load = 100 * (wall_time - idle_time) / wall_time;
 151
 152                if (dbs_data->cdata->governor == GOV_ONDEMAND) {
 153                        int freq_avg = __cpufreq_driver_getavg(policy, j);
 154                        if (freq_avg <= 0)
 155                                freq_avg = policy->cur;
 156
 157                        load *= freq_avg;
 158                }
 159
 160                if (load > max_load)
 161                        max_load = load;
 162        }
 163
 164        dbs_data->cdata->gov_check_cpu(cpu, max_load);
 165}
 166EXPORT_SYMBOL_GPL(dbs_check_cpu);
 167
 168static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
 169                unsigned int delay)
 170{
 171        struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 172
 173        mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
 174}
 175
 176void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 177                unsigned int delay, bool all_cpus)
 178{
 179        int i;
 180
 181        if (!all_cpus) {
 182                __gov_queue_work(smp_processor_id(), dbs_data, delay);
 183        } else {
 184                get_online_cpus();
 185                for_each_cpu(i, policy->cpus)
 186                        __gov_queue_work(i, dbs_data, delay);
 187                put_online_cpus();
 188        }
 189}
 190EXPORT_SYMBOL_GPL(gov_queue_work);
 191
 192static inline void gov_cancel_work(struct dbs_data *dbs_data,
 193                struct cpufreq_policy *policy)
 194{
 195        struct cpu_dbs_common_info *cdbs;
 196        int i;
 197
 198        for_each_cpu(i, policy->cpus) {
 199                cdbs = dbs_data->cdata->get_cpu_cdbs(i);
 200                cancel_delayed_work_sync(&cdbs->work);
 201        }
 202}
 203
 204/* Will return if we need to evaluate cpu load again or not */
 205bool need_load_eval(struct cpu_dbs_common_info *cdbs,
 206                unsigned int sampling_rate)
 207{
 208        if (policy_is_shared(cdbs->cur_policy)) {
 209                ktime_t time_now = ktime_get();
 210                s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp);
 211
 212                /* Do nothing if we recently have sampled */
 213                if (delta_us < (s64)(sampling_rate / 2))
 214                        return false;
 215                else
 216                        cdbs->time_stamp = time_now;
 217        }
 218
 219        return true;
 220}
 221EXPORT_SYMBOL_GPL(need_load_eval);
 222
 223static void set_sampling_rate(struct dbs_data *dbs_data,
 224                unsigned int sampling_rate)
 225{
 226        if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 227                struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 228                cs_tuners->sampling_rate = sampling_rate;
 229        } else {
 230                struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 231                od_tuners->sampling_rate = sampling_rate;
 232        }
 233}
 234
 235int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 236                struct common_dbs_data *cdata, unsigned int event)
 237{
 238        struct dbs_data *dbs_data;
 239        struct od_cpu_dbs_info_s *od_dbs_info = NULL;
 240        struct cs_cpu_dbs_info_s *cs_dbs_info = NULL;
 241        struct od_ops *od_ops = NULL;
 242        struct od_dbs_tuners *od_tuners = NULL;
 243        struct cs_dbs_tuners *cs_tuners = NULL;
 244        struct cpu_dbs_common_info *cpu_cdbs;
 245        unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
 246        int io_busy = 0;
 247        int rc;
 248
 249        if (have_governor_per_policy())
 250                dbs_data = policy->governor_data;
 251        else
 252                dbs_data = cdata->gdbs_data;
 253
 254        WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT));
 255
 256        switch (event) {
 257        case CPUFREQ_GOV_POLICY_INIT:
 258                if (have_governor_per_policy()) {
 259                        WARN_ON(dbs_data);
 260                } else if (dbs_data) {
 261                        dbs_data->usage_count++;
 262                        policy->governor_data = dbs_data;
 263                        return 0;
 264                }
 265
 266                dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
 267                if (!dbs_data) {
 268                        pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
 269                        return -ENOMEM;
 270                }
 271
 272                dbs_data->cdata = cdata;
 273                dbs_data->usage_count = 1;
 274                rc = cdata->init(dbs_data);
 275                if (rc) {
 276                        pr_err("%s: POLICY_INIT: init() failed\n", __func__);
 277                        kfree(dbs_data);
 278                        return rc;
 279                }
 280
 281                rc = sysfs_create_group(get_governor_parent_kobj(policy),
 282                                get_sysfs_attr(dbs_data));
 283                if (rc) {
 284                        cdata->exit(dbs_data);
 285                        kfree(dbs_data);
 286                        return rc;
 287                }
 288
 289                policy->governor_data = dbs_data;
 290
 291                /* policy latency is in nS. Convert it to uS first */
 292                latency = policy->cpuinfo.transition_latency / 1000;
 293                if (latency == 0)
 294                        latency = 1;
 295
 296                /* Bring kernel and HW constraints together */
 297                dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
 298                                MIN_LATENCY_MULTIPLIER * latency);
 299                set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
 300                                        latency * LATENCY_MULTIPLIER));
 301
 302                if ((cdata->governor == GOV_CONSERVATIVE) &&
 303                                (!policy->governor->initialized)) {
 304                        struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;
 305
 306                        cpufreq_register_notifier(cs_ops->notifier_block,
 307                                        CPUFREQ_TRANSITION_NOTIFIER);
 308                }
 309
 310                if (!have_governor_per_policy())
 311                        cdata->gdbs_data = dbs_data;
 312
 313                return 0;
 314        case CPUFREQ_GOV_POLICY_EXIT:
 315                if (!--dbs_data->usage_count) {
 316                        sysfs_remove_group(get_governor_parent_kobj(policy),
 317                                        get_sysfs_attr(dbs_data));
 318
 319                        if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) &&
 320                                (policy->governor->initialized == 1)) {
 321                                struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;
 322
 323                                cpufreq_unregister_notifier(cs_ops->notifier_block,
 324                                                CPUFREQ_TRANSITION_NOTIFIER);
 325                        }
 326
 327                        cdata->exit(dbs_data);
 328                        kfree(dbs_data);
 329                        cdata->gdbs_data = NULL;
 330                }
 331
 332                policy->governor_data = NULL;
 333                return 0;
 334        }
 335
 336        cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 337
 338        if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 339                cs_tuners = dbs_data->tuners;
 340                cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
 341                sampling_rate = cs_tuners->sampling_rate;
 342                ignore_nice = cs_tuners->ignore_nice;
 343        } else {
 344                od_tuners = dbs_data->tuners;
 345                od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
 346                sampling_rate = od_tuners->sampling_rate;
 347                ignore_nice = od_tuners->ignore_nice;
 348                od_ops = dbs_data->cdata->gov_ops;
 349                io_busy = od_tuners->io_is_busy;
 350        }
 351
 352        switch (event) {
 353        case CPUFREQ_GOV_START:
 354                if (!policy->cur)
 355                        return -EINVAL;
 356
 357                mutex_lock(&dbs_data->mutex);
 358
 359                for_each_cpu(j, policy->cpus) {
 360                        struct cpu_dbs_common_info *j_cdbs =
 361                                dbs_data->cdata->get_cpu_cdbs(j);
 362
 363                        j_cdbs->cpu = j;
 364                        j_cdbs->cur_policy = policy;
 365                        j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
 366                                               &j_cdbs->prev_cpu_wall, io_busy);
 367                        if (ignore_nice)
 368                                j_cdbs->prev_cpu_nice =
 369                                        kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 370
 371                        mutex_init(&j_cdbs->timer_mutex);
 372                        INIT_DEFERRABLE_WORK(&j_cdbs->work,
 373                                             dbs_data->cdata->gov_dbs_timer);
 374                }
 375
 376                /*
 377                 * conservative does not implement micro like ondemand
 378                 * governor, thus we are bound to jiffes/HZ
 379                 */
 380                if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 381                        cs_dbs_info->down_skip = 0;
 382                        cs_dbs_info->enable = 1;
 383                        cs_dbs_info->requested_freq = policy->cur;
 384                } else {
 385                        od_dbs_info->rate_mult = 1;
 386                        od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 387                        od_ops->powersave_bias_init_cpu(cpu);
 388                }
 389
 390                mutex_unlock(&dbs_data->mutex);
 391
 392                /* Initiate timer time stamp */
 393                cpu_cdbs->time_stamp = ktime_get();
 394
 395                gov_queue_work(dbs_data, policy,
 396                                delay_for_sampling_rate(sampling_rate), true);
 397                break;
 398
 399        case CPUFREQ_GOV_STOP:
 400                if (dbs_data->cdata->governor == GOV_CONSERVATIVE)
 401                        cs_dbs_info->enable = 0;
 402
 403                gov_cancel_work(dbs_data, policy);
 404
 405                mutex_lock(&dbs_data->mutex);
 406                mutex_destroy(&cpu_cdbs->timer_mutex);
 407
 408                mutex_unlock(&dbs_data->mutex);
 409
 410                break;
 411
 412        case CPUFREQ_GOV_LIMITS:
 413                mutex_lock(&cpu_cdbs->timer_mutex);
 414                if (policy->max < cpu_cdbs->cur_policy->cur)
 415                        __cpufreq_driver_target(cpu_cdbs->cur_policy,
 416                                        policy->max, CPUFREQ_RELATION_H);
 417                else if (policy->min > cpu_cdbs->cur_policy->cur)
 418                        __cpufreq_driver_target(cpu_cdbs->cur_policy,
 419                                        policy->min, CPUFREQ_RELATION_L);
 420                dbs_check_cpu(dbs_data, cpu);
 421                mutex_unlock(&cpu_cdbs->timer_mutex);
 422                break;
 423        }
 424        return 0;
 425}
 426EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
 427
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.