linux/arch/x86/events/amd/power.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Performance events - AMD Processor Power Reporting Mechanism
   4 *
   5 * Copyright (C) 2016 Advanced Micro Devices, Inc.
   6 *
   7 * Author: Huang Rui <ray.huang@amd.com>
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/slab.h>
  12#include <linux/perf_event.h>
  13#include <asm/cpu_device_id.h>
  14#include "../perf_event.h"
  15
  16/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
  17#define AMD_POWER_EVENT_MASK            0xFFULL
  18
  19/*
  20 * Accumulated power status counters.
  21 */
  22#define AMD_POWER_EVENTSEL_PKG          1
  23
  24/*
  25 * The ratio of compute unit power accumulator sample period to the
  26 * PTSC period.
  27 */
  28static unsigned int cpu_pwr_sample_ratio;
  29
  30/* Maximum accumulated power of a compute unit. */
  31static u64 max_cu_acc_power;
  32
  33static struct pmu pmu_class;
  34
  35/*
  36 * Accumulated power represents the sum of each compute unit's (CU) power
  37 * consumption. On any core of each CU we read the total accumulated power from
  38 * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores
  39 * which are picked to measure the power for the CUs they belong to.
  40 */
  41static cpumask_t cpu_mask;
  42
  43static void event_update(struct perf_event *event)
  44{
  45        struct hw_perf_event *hwc = &event->hw;
  46        u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc;
  47        u64 delta, tdelta;
  48
  49        prev_pwr_acc = hwc->pwr_acc;
  50        prev_ptsc = hwc->ptsc;
  51        rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
  52        rdmsrl(MSR_F15H_PTSC, new_ptsc);
  53
  54        /*
  55         * Calculate the CU power consumption over a time period, the unit of
  56         * final value (delta) is micro-Watts. Then add it to the event count.
  57         */
  58        if (new_pwr_acc < prev_pwr_acc) {
  59                delta = max_cu_acc_power + new_pwr_acc;
  60                delta -= prev_pwr_acc;
  61        } else
  62                delta = new_pwr_acc - prev_pwr_acc;
  63
  64        delta *= cpu_pwr_sample_ratio * 1000;
  65        tdelta = new_ptsc - prev_ptsc;
  66
  67        do_div(delta, tdelta);
  68        local64_add(delta, &event->count);
  69}
  70
  71static void __pmu_event_start(struct perf_event *event)
  72{
  73        if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
  74                return;
  75
  76        event->hw.state = 0;
  77
  78        rdmsrl(MSR_F15H_PTSC, event->hw.ptsc);
  79        rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
  80}
  81
  82static void pmu_event_start(struct perf_event *event, int mode)
  83{
  84        __pmu_event_start(event);
  85}
  86
  87static void pmu_event_stop(struct perf_event *event, int mode)
  88{
  89        struct hw_perf_event *hwc = &event->hw;
  90
  91        /* Mark event as deactivated and stopped. */
  92        if (!(hwc->state & PERF_HES_STOPPED))
  93                hwc->state |= PERF_HES_STOPPED;
  94
  95        /* Check if software counter update is necessary. */
  96        if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
  97                /*
  98                 * Drain the remaining delta count out of an event
  99                 * that we are disabling:
 100                 */
 101                event_update(event);
 102                hwc->state |= PERF_HES_UPTODATE;
 103        }
 104}
 105
 106static int pmu_event_add(struct perf_event *event, int mode)
 107{
 108        struct hw_perf_event *hwc = &event->hw;
 109
 110        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 111
 112        if (mode & PERF_EF_START)
 113                __pmu_event_start(event);
 114
 115        return 0;
 116}
 117
 118static void pmu_event_del(struct perf_event *event, int flags)
 119{
 120        pmu_event_stop(event, PERF_EF_UPDATE);
 121}
 122
 123static int pmu_event_init(struct perf_event *event)
 124{
 125        u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK;
 126
 127        /* Only look at AMD power events. */
 128        if (event->attr.type != pmu_class.type)
 129                return -ENOENT;
 130
 131        /* Unsupported modes and filters. */
 132        if (event->attr.sample_period)
 133                return -EINVAL;
 134
 135        if (cfg != AMD_POWER_EVENTSEL_PKG)
 136                return -EINVAL;
 137
 138        return 0;
 139}
 140
 141static void pmu_event_read(struct perf_event *event)
 142{
 143        event_update(event);
 144}
 145
 146static ssize_t
 147get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf)
 148{
 149        return cpumap_print_to_pagebuf(true, buf, &cpu_mask);
 150}
 151
 152static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL);
 153
 154static struct attribute *pmu_attrs[] = {
 155        &dev_attr_cpumask.attr,
 156        NULL,
 157};
 158
 159static struct attribute_group pmu_attr_group = {
 160        .attrs = pmu_attrs,
 161};
 162
 163/*
 164 * Currently it only supports to report the power of each
 165 * processor/package.
 166 */
 167EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01");
 168
 169EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts");
 170
 171/* Convert the count from micro-Watts to milli-Watts. */
 172EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3");
 173
 174static struct attribute *events_attr[] = {
 175        EVENT_PTR(power_pkg),
 176        EVENT_PTR(power_pkg_unit),
 177        EVENT_PTR(power_pkg_scale),
 178        NULL,
 179};
 180
 181static struct attribute_group pmu_events_group = {
 182        .name   = "events",
 183        .attrs  = events_attr,
 184};
 185
 186PMU_FORMAT_ATTR(event, "config:0-7");
 187
 188static struct attribute *formats_attr[] = {
 189        &format_attr_event.attr,
 190        NULL,
 191};
 192
 193static struct attribute_group pmu_format_group = {
 194        .name   = "format",
 195        .attrs  = formats_attr,
 196};
 197
 198static const struct attribute_group *attr_groups[] = {
 199        &pmu_attr_group,
 200        &pmu_format_group,
 201        &pmu_events_group,
 202        NULL,
 203};
 204
 205static struct pmu pmu_class = {
 206        .attr_groups    = attr_groups,
 207        /* system-wide only */
 208        .task_ctx_nr    = perf_invalid_context,
 209        .event_init     = pmu_event_init,
 210        .add            = pmu_event_add,
 211        .del            = pmu_event_del,
 212        .start          = pmu_event_start,
 213        .stop           = pmu_event_stop,
 214        .read           = pmu_event_read,
 215        .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
 216        .module         = THIS_MODULE,
 217};
 218
 219static int power_cpu_exit(unsigned int cpu)
 220{
 221        int target;
 222
 223        if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask))
 224                return 0;
 225
 226        /*
 227         * Find a new CPU on the same compute unit, if was set in cpumask
 228         * and still some CPUs on compute unit. Then migrate event and
 229         * context to new CPU.
 230         */
 231        target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
 232        if (target < nr_cpumask_bits) {
 233                cpumask_set_cpu(target, &cpu_mask);
 234                perf_pmu_migrate_context(&pmu_class, cpu, target);
 235        }
 236        return 0;
 237}
 238
 239static int power_cpu_init(unsigned int cpu)
 240{
 241        int target;
 242
 243        /*
 244         * 1) If any CPU is set at cpu_mask in the same compute unit, do
 245         * nothing.
 246         * 2) If no CPU is set at cpu_mask in the same compute unit,
 247         * set current ONLINE CPU.
 248         *
 249         * Note: if there is a CPU aside of the new one already in the
 250         * sibling mask, then it is also in cpu_mask.
 251         */
 252        target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
 253        if (target >= nr_cpumask_bits)
 254                cpumask_set_cpu(cpu, &cpu_mask);
 255        return 0;
 256}
 257
 258static const struct x86_cpu_id cpu_match[] = {
 259        X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL),
 260        {},
 261};
 262
 263static int __init amd_power_pmu_init(void)
 264{
 265        int ret;
 266
 267        if (!x86_match_cpu(cpu_match))
 268                return -ENODEV;
 269
 270        if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
 271                return -ENODEV;
 272
 273        cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
 274
 275        if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
 276                pr_err("Failed to read max compute unit power accumulator MSR\n");
 277                return -ENODEV;
 278        }
 279
 280
 281        cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
 282                          "perf/x86/amd/power:online",
 283                          power_cpu_init, power_cpu_exit);
 284
 285        ret = perf_pmu_register(&pmu_class, "power", -1);
 286        if (WARN_ON(ret)) {
 287                pr_warn("AMD Power PMU registration failed\n");
 288                return ret;
 289        }
 290
 291        pr_info("AMD Power PMU detected\n");
 292        return ret;
 293}
 294module_init(amd_power_pmu_init);
 295
 296static void __exit amd_power_pmu_exit(void)
 297{
 298        cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE);
 299        perf_pmu_unregister(&pmu_class);
 300}
 301module_exit(amd_power_pmu_exit);
 302
 303MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
 304MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism");
 305MODULE_LICENSE("GPL v2");
 306