linux/arch/arm/kernel/perf_event.c
<<
>>
Prefs
   1#undef DEBUG
   2
   3/*
   4 * ARM performance counter support.
   5 *
   6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
   7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
   8 *
   9 * This code is based on the sparc64 perf event code, which is in turn based
  10 * on the x86 code.
  11 */
  12#define pr_fmt(fmt) "hw perfevents: " fmt
  13
  14#include <linux/kernel.h>
  15#include <linux/platform_device.h>
  16#include <linux/pm_runtime.h>
  17#include <linux/irq.h>
  18#include <linux/irqdesc.h>
  19
  20#include <asm/irq_regs.h>
  21#include <asm/pmu.h>
  22
  23static int
  24armpmu_map_cache_event(const unsigned (*cache_map)
  25                                      [PERF_COUNT_HW_CACHE_MAX]
  26                                      [PERF_COUNT_HW_CACHE_OP_MAX]
  27                                      [PERF_COUNT_HW_CACHE_RESULT_MAX],
  28                       u64 config)
  29{
  30        unsigned int cache_type, cache_op, cache_result, ret;
  31
  32        cache_type = (config >>  0) & 0xff;
  33        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  34                return -EINVAL;
  35
  36        cache_op = (config >>  8) & 0xff;
  37        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  38                return -EINVAL;
  39
  40        cache_result = (config >> 16) & 0xff;
  41        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  42                return -EINVAL;
  43
  44        ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
  45
  46        if (ret == CACHE_OP_UNSUPPORTED)
  47                return -ENOENT;
  48
  49        return ret;
  50}
  51
  52static int
  53armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
  54{
  55        int mapping;
  56
  57        if (config >= PERF_COUNT_HW_MAX)
  58                return -EINVAL;
  59
  60        mapping = (*event_map)[config];
  61        return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
  62}
  63
  64static int
  65armpmu_map_raw_event(u32 raw_event_mask, u64 config)
  66{
  67        return (int)(config & raw_event_mask);
  68}
  69
  70int
  71armpmu_map_event(struct perf_event *event,
  72                 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
  73                 const unsigned (*cache_map)
  74                                [PERF_COUNT_HW_CACHE_MAX]
  75                                [PERF_COUNT_HW_CACHE_OP_MAX]
  76                                [PERF_COUNT_HW_CACHE_RESULT_MAX],
  77                 u32 raw_event_mask)
  78{
  79        u64 config = event->attr.config;
  80        int type = event->attr.type;
  81
  82        if (type == event->pmu->type)
  83                return armpmu_map_raw_event(raw_event_mask, config);
  84
  85        switch (type) {
  86        case PERF_TYPE_HARDWARE:
  87                return armpmu_map_hw_event(event_map, config);
  88        case PERF_TYPE_HW_CACHE:
  89                return armpmu_map_cache_event(cache_map, config);
  90        case PERF_TYPE_RAW:
  91                return armpmu_map_raw_event(raw_event_mask, config);
  92        }
  93
  94        return -ENOENT;
  95}
  96
  97int armpmu_event_set_period(struct perf_event *event)
  98{
  99        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 100        struct hw_perf_event *hwc = &event->hw;
 101        s64 left = local64_read(&hwc->period_left);
 102        s64 period = hwc->sample_period;
 103        int ret = 0;
 104
 105        if (unlikely(left <= -period)) {
 106                left = period;
 107                local64_set(&hwc->period_left, left);
 108                hwc->last_period = period;
 109                ret = 1;
 110        }
 111
 112        if (unlikely(left <= 0)) {
 113                left += period;
 114                local64_set(&hwc->period_left, left);
 115                hwc->last_period = period;
 116                ret = 1;
 117        }
 118
 119        /*
 120         * Limit the maximum period to prevent the counter value
 121         * from overtaking the one we are about to program. In
 122         * effect we are reducing max_period to account for
 123         * interrupt latency (and we are being very conservative).
 124         */
 125        if (left > (armpmu->max_period >> 1))
 126                left = armpmu->max_period >> 1;
 127
 128        local64_set(&hwc->prev_count, (u64)-left);
 129
 130        armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
 131
 132        perf_event_update_userpage(event);
 133
 134        return ret;
 135}
 136
 137u64 armpmu_event_update(struct perf_event *event)
 138{
 139        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 140        struct hw_perf_event *hwc = &event->hw;
 141        u64 delta, prev_raw_count, new_raw_count;
 142
 143again:
 144        prev_raw_count = local64_read(&hwc->prev_count);
 145        new_raw_count = armpmu->read_counter(event);
 146
 147        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 148                             new_raw_count) != prev_raw_count)
 149                goto again;
 150
 151        delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
 152
 153        local64_add(delta, &event->count);
 154        local64_sub(delta, &hwc->period_left);
 155
 156        return new_raw_count;
 157}
 158
 159static void
 160armpmu_read(struct perf_event *event)
 161{
 162        armpmu_event_update(event);
 163}
 164
 165static void
 166armpmu_stop(struct perf_event *event, int flags)
 167{
 168        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 169        struct hw_perf_event *hwc = &event->hw;
 170
 171        /*
 172         * ARM pmu always has to update the counter, so ignore
 173         * PERF_EF_UPDATE, see comments in armpmu_start().
 174         */
 175        if (!(hwc->state & PERF_HES_STOPPED)) {
 176                armpmu->disable(event);
 177                armpmu_event_update(event);
 178                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 179        }
 180}
 181
 182static void armpmu_start(struct perf_event *event, int flags)
 183{
 184        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 185        struct hw_perf_event *hwc = &event->hw;
 186
 187        /*
 188         * ARM pmu always has to reprogram the period, so ignore
 189         * PERF_EF_RELOAD, see the comment below.
 190         */
 191        if (flags & PERF_EF_RELOAD)
 192                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 193
 194        hwc->state = 0;
 195        /*
 196         * Set the period again. Some counters can't be stopped, so when we
 197         * were stopped we simply disabled the IRQ source and the counter
 198         * may have been left counting. If we don't do this step then we may
 199         * get an interrupt too soon or *way* too late if the overflow has
 200         * happened since disabling.
 201         */
 202        armpmu_event_set_period(event);
 203        armpmu->enable(event);
 204}
 205
 206static void
 207armpmu_del(struct perf_event *event, int flags)
 208{
 209        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 210        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 211        struct hw_perf_event *hwc = &event->hw;
 212        int idx = hwc->idx;
 213
 214        armpmu_stop(event, PERF_EF_UPDATE);
 215        hw_events->events[idx] = NULL;
 216        clear_bit(idx, hw_events->used_mask);
 217        if (armpmu->clear_event_idx)
 218                armpmu->clear_event_idx(hw_events, event);
 219
 220        perf_event_update_userpage(event);
 221}
 222
 223static int
 224armpmu_add(struct perf_event *event, int flags)
 225{
 226        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 227        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 228        struct hw_perf_event *hwc = &event->hw;
 229        int idx;
 230        int err = 0;
 231
 232        perf_pmu_disable(event->pmu);
 233
 234        /* If we don't have a space for the counter then finish early. */
 235        idx = armpmu->get_event_idx(hw_events, event);
 236        if (idx < 0) {
 237                err = idx;
 238                goto out;
 239        }
 240
 241        /*
 242         * If there is an event in the counter we are going to use then make
 243         * sure it is disabled.
 244         */
 245        event->hw.idx = idx;
 246        armpmu->disable(event);
 247        hw_events->events[idx] = event;
 248
 249        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 250        if (flags & PERF_EF_START)
 251                armpmu_start(event, PERF_EF_RELOAD);
 252
 253        /* Propagate our changes to the userspace mapping. */
 254        perf_event_update_userpage(event);
 255
 256out:
 257        perf_pmu_enable(event->pmu);
 258        return err;
 259}
 260
 261static int
 262validate_event(struct pmu_hw_events *hw_events,
 263               struct perf_event *event)
 264{
 265        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 266
 267        if (is_software_event(event))
 268                return 1;
 269
 270        if (event->state < PERF_EVENT_STATE_OFF)
 271                return 1;
 272
 273        if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 274                return 1;
 275
 276        return armpmu->get_event_idx(hw_events, event) >= 0;
 277}
 278
 279static int
 280validate_group(struct perf_event *event)
 281{
 282        struct perf_event *sibling, *leader = event->group_leader;
 283        struct pmu_hw_events fake_pmu;
 284
 285        /*
 286         * Initialise the fake PMU. We only need to populate the
 287         * used_mask for the purposes of validation.
 288         */
 289        memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 290
 291        if (!validate_event(&fake_pmu, leader))
 292                return -EINVAL;
 293
 294        list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
 295                if (!validate_event(&fake_pmu, sibling))
 296                        return -EINVAL;
 297        }
 298
 299        if (!validate_event(&fake_pmu, event))
 300                return -EINVAL;
 301
 302        return 0;
 303}
 304
 305static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 306{
 307        struct arm_pmu *armpmu;
 308        struct platform_device *plat_device;
 309        struct arm_pmu_platdata *plat;
 310        int ret;
 311        u64 start_clock, finish_clock;
 312
 313        /*
 314         * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
 315         * the handlers expect a struct arm_pmu*. The percpu_irq framework will
 316         * do any necessary shifting, we just need to perform the first
 317         * dereference.
 318         */
 319        armpmu = *(void **)dev;
 320        plat_device = armpmu->plat_device;
 321        plat = dev_get_platdata(&plat_device->dev);
 322
 323        start_clock = sched_clock();
 324        if (plat && plat->handle_irq)
 325                ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
 326        else
 327                ret = armpmu->handle_irq(irq, armpmu);
 328        finish_clock = sched_clock();
 329
 330        perf_sample_event_took(finish_clock - start_clock);
 331        return ret;
 332}
 333
 334static void
 335armpmu_release_hardware(struct arm_pmu *armpmu)
 336{
 337        armpmu->free_irq(armpmu);
 338        pm_runtime_put_sync(&armpmu->plat_device->dev);
 339}
 340
 341static int
 342armpmu_reserve_hardware(struct arm_pmu *armpmu)
 343{
 344        int err;
 345        struct platform_device *pmu_device = armpmu->plat_device;
 346
 347        if (!pmu_device)
 348                return -ENODEV;
 349
 350        pm_runtime_get_sync(&pmu_device->dev);
 351        err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 352        if (err) {
 353                armpmu_release_hardware(armpmu);
 354                return err;
 355        }
 356
 357        return 0;
 358}
 359
 360static void
 361hw_perf_event_destroy(struct perf_event *event)
 362{
 363        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 364        atomic_t *active_events  = &armpmu->active_events;
 365        struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 366
 367        if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
 368                armpmu_release_hardware(armpmu);
 369                mutex_unlock(pmu_reserve_mutex);
 370        }
 371}
 372
 373static int
 374event_requires_mode_exclusion(struct perf_event_attr *attr)
 375{
 376        return attr->exclude_idle || attr->exclude_user ||
 377               attr->exclude_kernel || attr->exclude_hv;
 378}
 379
 380static int
 381__hw_perf_event_init(struct perf_event *event)
 382{
 383        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 384        struct hw_perf_event *hwc = &event->hw;
 385        int mapping;
 386
 387        mapping = armpmu->map_event(event);
 388
 389        if (mapping < 0) {
 390                pr_debug("event %x:%llx not supported\n", event->attr.type,
 391                         event->attr.config);
 392                return mapping;
 393        }
 394
 395        /*
 396         * We don't assign an index until we actually place the event onto
 397         * hardware. Use -1 to signify that we haven't decided where to put it
 398         * yet. For SMP systems, each core has it's own PMU so we can't do any
 399         * clever allocation or constraints checking at this point.
 400         */
 401        hwc->idx                = -1;
 402        hwc->config_base        = 0;
 403        hwc->config             = 0;
 404        hwc->event_base         = 0;
 405
 406        /*
 407         * Check whether we need to exclude the counter from certain modes.
 408         */
 409        if ((!armpmu->set_event_filter ||
 410             armpmu->set_event_filter(hwc, &event->attr)) &&
 411             event_requires_mode_exclusion(&event->attr)) {
 412                pr_debug("ARM performance counters do not support "
 413                         "mode exclusion\n");
 414                return -EOPNOTSUPP;
 415        }
 416
 417        /*
 418         * Store the event encoding into the config_base field.
 419         */
 420        hwc->config_base            |= (unsigned long)mapping;
 421
 422        if (!is_sampling_event(event)) {
 423                /*
 424                 * For non-sampling runs, limit the sample_period to half
 425                 * of the counter width. That way, the new counter value
 426                 * is far less likely to overtake the previous one unless
 427                 * you have some serious IRQ latency issues.
 428                 */
 429                hwc->sample_period  = armpmu->max_period >> 1;
 430                hwc->last_period    = hwc->sample_period;
 431                local64_set(&hwc->period_left, hwc->sample_period);
 432        }
 433
 434        if (event->group_leader != event) {
 435                if (validate_group(event) != 0)
 436                        return -EINVAL;
 437        }
 438
 439        return 0;
 440}
 441
 442static int armpmu_event_init(struct perf_event *event)
 443{
 444        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 445        int err = 0;
 446        atomic_t *active_events = &armpmu->active_events;
 447
 448        /* does not support taken branch sampling */
 449        if (has_branch_stack(event))
 450                return -EOPNOTSUPP;
 451
 452        if (armpmu->map_event(event) == -ENOENT)
 453                return -ENOENT;
 454
 455        event->destroy = hw_perf_event_destroy;
 456
 457        if (!atomic_inc_not_zero(active_events)) {
 458                mutex_lock(&armpmu->reserve_mutex);
 459                if (atomic_read(active_events) == 0)
 460                        err = armpmu_reserve_hardware(armpmu);
 461
 462                if (!err)
 463                        atomic_inc(active_events);
 464                mutex_unlock(&armpmu->reserve_mutex);
 465        }
 466
 467        if (err)
 468                return err;
 469
 470        err = __hw_perf_event_init(event);
 471        if (err)
 472                hw_perf_event_destroy(event);
 473
 474        return err;
 475}
 476
 477static void armpmu_enable(struct pmu *pmu)
 478{
 479        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 480        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 481        int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 482
 483        if (enabled)
 484                armpmu->start(armpmu);
 485}
 486
 487static void armpmu_disable(struct pmu *pmu)
 488{
 489        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 490        armpmu->stop(armpmu);
 491}
 492
 493#ifdef CONFIG_PM
 494static int armpmu_runtime_resume(struct device *dev)
 495{
 496        struct arm_pmu_platdata *plat = dev_get_platdata(dev);
 497
 498        if (plat && plat->runtime_resume)
 499                return plat->runtime_resume(dev);
 500
 501        return 0;
 502}
 503
 504static int armpmu_runtime_suspend(struct device *dev)
 505{
 506        struct arm_pmu_platdata *plat = dev_get_platdata(dev);
 507
 508        if (plat && plat->runtime_suspend)
 509                return plat->runtime_suspend(dev);
 510
 511        return 0;
 512}
 513#endif
 514
 515const struct dev_pm_ops armpmu_dev_pm_ops = {
 516        SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
 517};
 518
 519static void armpmu_init(struct arm_pmu *armpmu)
 520{
 521        atomic_set(&armpmu->active_events, 0);
 522        mutex_init(&armpmu->reserve_mutex);
 523
 524        armpmu->pmu = (struct pmu) {
 525                .pmu_enable     = armpmu_enable,
 526                .pmu_disable    = armpmu_disable,
 527                .event_init     = armpmu_event_init,
 528                .add            = armpmu_add,
 529                .del            = armpmu_del,
 530                .start          = armpmu_start,
 531                .stop           = armpmu_stop,
 532                .read           = armpmu_read,
 533        };
 534}
 535
 536int armpmu_register(struct arm_pmu *armpmu, int type)
 537{
 538        armpmu_init(armpmu);
 539        pm_runtime_enable(&armpmu->plat_device->dev);
 540        pr_info("enabled with %s PMU driver, %d counters available\n",
 541                        armpmu->name, armpmu->num_events);
 542        return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 543}
 544
 545
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.