linux/arch/x86/events/intel/uncore.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/module.h>
   3
   4#include <asm/cpu_device_id.h>
   5#include <asm/intel-family.h>
   6#include "uncore.h"
   7#include "uncore_discovery.h"
   8
   9static bool uncore_no_discover;
  10module_param(uncore_no_discover, bool, 0);
  11MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
  12                                     "(default: enable the discovery mechanism).");
  13struct intel_uncore_type *empty_uncore[] = { NULL, };
  14struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
  15struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
  16struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
  17
  18static bool pcidrv_registered;
  19struct pci_driver *uncore_pci_driver;
  20/* The PCI driver for the device which the uncore doesn't own. */
  21struct pci_driver *uncore_pci_sub_driver;
  22/* pci bus to socket mapping */
  23DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
  24struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
  25struct pci_extra_dev *uncore_extra_pci_dev;
  26int __uncore_max_dies;
  27
  28/* mask of cpus that collect uncore events */
  29static cpumask_t uncore_cpu_mask;
  30
  31/* constraint for the fixed counter */
  32static struct event_constraint uncore_constraint_fixed =
  33        EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
  34struct event_constraint uncore_constraint_empty =
  35        EVENT_CONSTRAINT(0, 0, 0);
  36
  37MODULE_LICENSE("GPL");
  38
  39int uncore_pcibus_to_dieid(struct pci_bus *bus)
  40{
  41        struct pci2phy_map *map;
  42        int die_id = -1;
  43
  44        raw_spin_lock(&pci2phy_map_lock);
  45        list_for_each_entry(map, &pci2phy_map_head, list) {
  46                if (map->segment == pci_domain_nr(bus)) {
  47                        die_id = map->pbus_to_dieid[bus->number];
  48                        break;
  49                }
  50        }
  51        raw_spin_unlock(&pci2phy_map_lock);
  52
  53        return die_id;
  54}
  55
  56int uncore_die_to_segment(int die)
  57{
  58        struct pci_bus *bus = NULL;
  59
  60        /* Find first pci bus which attributes to specified die. */
  61        while ((bus = pci_find_next_bus(bus)) &&
  62               (die != uncore_pcibus_to_dieid(bus)))
  63                ;
  64
  65        return bus ? pci_domain_nr(bus) : -EINVAL;
  66}
  67
  68static void uncore_free_pcibus_map(void)
  69{
  70        struct pci2phy_map *map, *tmp;
  71
  72        list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
  73                list_del(&map->list);
  74                kfree(map);
  75        }
  76}
  77
  78struct pci2phy_map *__find_pci2phy_map(int segment)
  79{
  80        struct pci2phy_map *map, *alloc = NULL;
  81        int i;
  82
  83        lockdep_assert_held(&pci2phy_map_lock);
  84
  85lookup:
  86        list_for_each_entry(map, &pci2phy_map_head, list) {
  87                if (map->segment == segment)
  88                        goto end;
  89        }
  90
  91        if (!alloc) {
  92                raw_spin_unlock(&pci2phy_map_lock);
  93                alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
  94                raw_spin_lock(&pci2phy_map_lock);
  95
  96                if (!alloc)
  97                        return NULL;
  98
  99                goto lookup;
 100        }
 101
 102        map = alloc;
 103        alloc = NULL;
 104        map->segment = segment;
 105        for (i = 0; i < 256; i++)
 106                map->pbus_to_dieid[i] = -1;
 107        list_add_tail(&map->list, &pci2phy_map_head);
 108
 109end:
 110        kfree(alloc);
 111        return map;
 112}
 113
 114ssize_t uncore_event_show(struct device *dev,
 115                          struct device_attribute *attr, char *buf)
 116{
 117        struct uncore_event_desc *event =
 118                container_of(attr, struct uncore_event_desc, attr);
 119        return sprintf(buf, "%s", event->config);
 120}
 121
 122struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 123{
 124        unsigned int dieid = topology_logical_die_id(cpu);
 125
 126        /*
 127         * The unsigned check also catches the '-1' return value for non
 128         * existent mappings in the topology map.
 129         */
 130        return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
 131}
 132
 133u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
 134{
 135        u64 count;
 136
 137        rdmsrl(event->hw.event_base, count);
 138
 139        return count;
 140}
 141
 142void uncore_mmio_exit_box(struct intel_uncore_box *box)
 143{
 144        if (box->io_addr)
 145                iounmap(box->io_addr);
 146}
 147
 148u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
 149                             struct perf_event *event)
 150{
 151        if (!box->io_addr)
 152                return 0;
 153
 154        if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
 155                return 0;
 156
 157        return readq(box->io_addr + event->hw.event_base);
 158}
 159
 160/*
 161 * generic get constraint function for shared match/mask registers.
 162 */
 163struct event_constraint *
 164uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
 165{
 166        struct intel_uncore_extra_reg *er;
 167        struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 168        struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
 169        unsigned long flags;
 170        bool ok = false;
 171
 172        /*
 173         * reg->alloc can be set due to existing state, so for fake box we
 174         * need to ignore this, otherwise we might fail to allocate proper
 175         * fake state for this extra reg constraint.
 176         */
 177        if (reg1->idx == EXTRA_REG_NONE ||
 178            (!uncore_box_is_fake(box) && reg1->alloc))
 179                return NULL;
 180
 181        er = &box->shared_regs[reg1->idx];
 182        raw_spin_lock_irqsave(&er->lock, flags);
 183        if (!atomic_read(&er->ref) ||
 184            (er->config1 == reg1->config && er->config2 == reg2->config)) {
 185                atomic_inc(&er->ref);
 186                er->config1 = reg1->config;
 187                er->config2 = reg2->config;
 188                ok = true;
 189        }
 190        raw_spin_unlock_irqrestore(&er->lock, flags);
 191
 192        if (ok) {
 193                if (!uncore_box_is_fake(box))
 194                        reg1->alloc = 1;
 195                return NULL;
 196        }
 197
 198        return &uncore_constraint_empty;
 199}
 200
 201void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
 202{
 203        struct intel_uncore_extra_reg *er;
 204        struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 205
 206        /*
 207         * Only put constraint if extra reg was actually allocated. Also
 208         * takes care of event which do not use an extra shared reg.
 209         *
 210         * Also, if this is a fake box we shouldn't touch any event state
 211         * (reg->alloc) and we don't care about leaving inconsistent box
 212         * state either since it will be thrown out.
 213         */
 214        if (uncore_box_is_fake(box) || !reg1->alloc)
 215                return;
 216
 217        er = &box->shared_regs[reg1->idx];
 218        atomic_dec(&er->ref);
 219        reg1->alloc = 0;
 220}
 221
 222u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
 223{
 224        struct intel_uncore_extra_reg *er;
 225        unsigned long flags;
 226        u64 config;
 227
 228        er = &box->shared_regs[idx];
 229
 230        raw_spin_lock_irqsave(&er->lock, flags);
 231        config = er->config;
 232        raw_spin_unlock_irqrestore(&er->lock, flags);
 233
 234        return config;
 235}
 236
 237static void uncore_assign_hw_event(struct intel_uncore_box *box,
 238                                   struct perf_event *event, int idx)
 239{
 240        struct hw_perf_event *hwc = &event->hw;
 241
 242        hwc->idx = idx;
 243        hwc->last_tag = ++box->tags[idx];
 244
 245        if (uncore_pmc_fixed(hwc->idx)) {
 246                hwc->event_base = uncore_fixed_ctr(box);
 247                hwc->config_base = uncore_fixed_ctl(box);
 248                return;
 249        }
 250
 251        hwc->config_base = uncore_event_ctl(box, hwc->idx);
 252        hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 253}
 254
 255void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
 256{
 257        u64 prev_count, new_count, delta;
 258        int shift;
 259
 260        if (uncore_pmc_freerunning(event->hw.idx))
 261                shift = 64 - uncore_freerunning_bits(box, event);
 262        else if (uncore_pmc_fixed(event->hw.idx))
 263                shift = 64 - uncore_fixed_ctr_bits(box);
 264        else
 265                shift = 64 - uncore_perf_ctr_bits(box);
 266
 267        /* the hrtimer might modify the previous event value */
 268again:
 269        prev_count = local64_read(&event->hw.prev_count);
 270        new_count = uncore_read_counter(box, event);
 271        if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
 272                goto again;
 273
 274        delta = (new_count << shift) - (prev_count << shift);
 275        delta >>= shift;
 276
 277        local64_add(delta, &event->count);
 278}
 279
 280/*
 281 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
 282 * for SandyBridge. So we use hrtimer to periodically poll the counter
 283 * to avoid overflow.
 284 */
 285static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
 286{
 287        struct intel_uncore_box *box;
 288        struct perf_event *event;
 289        unsigned long flags;
 290        int bit;
 291
 292        box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
 293        if (!box->n_active || box->cpu != smp_processor_id())
 294                return HRTIMER_NORESTART;
 295        /*
 296         * disable local interrupt to prevent uncore_pmu_event_start/stop
 297         * to interrupt the update process
 298         */
 299        local_irq_save(flags);
 300
 301        /*
 302         * handle boxes with an active event list as opposed to active
 303         * counters
 304         */
 305        list_for_each_entry(event, &box->active_list, active_entry) {
 306                uncore_perf_event_update(box, event);
 307        }
 308
 309        for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
 310                uncore_perf_event_update(box, box->events[bit]);
 311
 312        local_irq_restore(flags);
 313
 314        hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
 315        return HRTIMER_RESTART;
 316}
 317
 318void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
 319{
 320        hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
 321                      HRTIMER_MODE_REL_PINNED);
 322}
 323
 324void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
 325{
 326        hrtimer_cancel(&box->hrtimer);
 327}
 328
 329static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
 330{
 331        hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 332        box->hrtimer.function = uncore_pmu_hrtimer;
 333}
 334
 335static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
 336                                                 int node)
 337{
 338        int i, size, numshared = type->num_shared_regs ;
 339        struct intel_uncore_box *box;
 340
 341        size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
 342
 343        box = kzalloc_node(size, GFP_KERNEL, node);
 344        if (!box)
 345                return NULL;
 346
 347        for (i = 0; i < numshared; i++)
 348                raw_spin_lock_init(&box->shared_regs[i].lock);
 349
 350        uncore_pmu_init_hrtimer(box);
 351        box->cpu = -1;
 352        box->dieid = -1;
 353
 354        /* set default hrtimer timeout */
 355        box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
 356
 357        INIT_LIST_HEAD(&box->active_list);
 358
 359        return box;
 360}
 361
 362/*
 363 * Using uncore_pmu_event_init pmu event_init callback
 364 * as a detection point for uncore events.
 365 */
 366static int uncore_pmu_event_init(struct perf_event *event);
 367
 368static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
 369{
 370        return &box->pmu->pmu == event->pmu;
 371}
 372
 373static int
 374uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 375                      bool dogrp)
 376{
 377        struct perf_event *event;
 378        int n, max_count;
 379
 380        max_count = box->pmu->type->num_counters;
 381        if (box->pmu->type->fixed_ctl)
 382                max_count++;
 383
 384        if (box->n_events >= max_count)
 385                return -EINVAL;
 386
 387        n = box->n_events;
 388
 389        if (is_box_event(box, leader)) {
 390                box->event_list[n] = leader;
 391                n++;
 392        }
 393
 394        if (!dogrp)
 395                return n;
 396
 397        for_each_sibling_event(event, leader) {
 398                if (!is_box_event(box, event) ||
 399                    event->state <= PERF_EVENT_STATE_OFF)
 400                        continue;
 401
 402                if (n >= max_count)
 403                        return -EINVAL;
 404
 405                box->event_list[n] = event;
 406                n++;
 407        }
 408        return n;
 409}
 410
 411static struct event_constraint *
 412uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 413{
 414        struct intel_uncore_type *type = box->pmu->type;
 415        struct event_constraint *c;
 416
 417        if (type->ops->get_constraint) {
 418                c = type->ops->get_constraint(box, event);
 419                if (c)
 420                        return c;
 421        }
 422
 423        if (event->attr.config == UNCORE_FIXED_EVENT)
 424                return &uncore_constraint_fixed;
 425
 426        if (type->constraints) {
 427                for_each_event_constraint(c, type->constraints) {
 428                        if ((event->hw.config & c->cmask) == c->code)
 429                                return c;
 430                }
 431        }
 432
 433        return &type->unconstrainted;
 434}
 435
 436static void uncore_put_event_constraint(struct intel_uncore_box *box,
 437                                        struct perf_event *event)
 438{
 439        if (box->pmu->type->ops->put_constraint)
 440                box->pmu->type->ops->put_constraint(box, event);
 441}
 442
 443static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
 444{
 445        unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 446        struct event_constraint *c;
 447        int i, wmin, wmax, ret = 0;
 448        struct hw_perf_event *hwc;
 449
 450        bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 451
 452        for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 453                c = uncore_get_event_constraint(box, box->event_list[i]);
 454                box->event_constraint[i] = c;
 455                wmin = min(wmin, c->weight);
 456                wmax = max(wmax, c->weight);
 457        }
 458
 459        /* fastpath, try to reuse previous register */
 460        for (i = 0; i < n; i++) {
 461                hwc = &box->event_list[i]->hw;
 462                c = box->event_constraint[i];
 463
 464                /* never assigned */
 465                if (hwc->idx == -1)
 466                        break;
 467
 468                /* constraint still honored */
 469                if (!test_bit(hwc->idx, c->idxmsk))
 470                        break;
 471
 472                /* not already used */
 473                if (test_bit(hwc->idx, used_mask))
 474                        break;
 475
 476                __set_bit(hwc->idx, used_mask);
 477                if (assign)
 478                        assign[i] = hwc->idx;
 479        }
 480        /* slow path */
 481        if (i != n)
 482                ret = perf_assign_events(box->event_constraint, n,
 483                                         wmin, wmax, n, assign);
 484
 485        if (!assign || ret) {
 486                for (i = 0; i < n; i++)
 487                        uncore_put_event_constraint(box, box->event_list[i]);
 488        }
 489        return ret ? -EINVAL : 0;
 490}
 491
 492void uncore_pmu_event_start(struct perf_event *event, int flags)
 493{
 494        struct intel_uncore_box *box = uncore_event_to_box(event);
 495        int idx = event->hw.idx;
 496
 497        if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
 498                return;
 499
 500        /*
 501         * Free running counter is read-only and always active.
 502         * Use the current counter value as start point.
 503         * There is no overflow interrupt for free running counter.
 504         * Use hrtimer to periodically poll the counter to avoid overflow.
 505         */
 506        if (uncore_pmc_freerunning(event->hw.idx)) {
 507                list_add_tail(&event->active_entry, &box->active_list);
 508                local64_set(&event->hw.prev_count,
 509                            uncore_read_counter(box, event));
 510                if (box->n_active++ == 0)
 511                        uncore_pmu_start_hrtimer(box);
 512                return;
 513        }
 514
 515        if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 516                return;
 517
 518        event->hw.state = 0;
 519        box->events[idx] = event;
 520        box->n_active++;
 521        __set_bit(idx, box->active_mask);
 522
 523        local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
 524        uncore_enable_event(box, event);
 525
 526        if (box->n_active == 1)
 527                uncore_pmu_start_hrtimer(box);
 528}
 529
 530void uncore_pmu_event_stop(struct perf_event *event, int flags)
 531{
 532        struct intel_uncore_box *box = uncore_event_to_box(event);
 533        struct hw_perf_event *hwc = &event->hw;
 534
 535        /* Cannot disable free running counter which is read-only */
 536        if (uncore_pmc_freerunning(hwc->idx)) {
 537                list_del(&event->active_entry);
 538                if (--box->n_active == 0)
 539                        uncore_pmu_cancel_hrtimer(box);
 540                uncore_perf_event_update(box, event);
 541                return;
 542        }
 543
 544        if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
 545                uncore_disable_event(box, event);
 546                box->n_active--;
 547                box->events[hwc->idx] = NULL;
 548                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 549                hwc->state |= PERF_HES_STOPPED;
 550
 551                if (box->n_active == 0)
 552                        uncore_pmu_cancel_hrtimer(box);
 553        }
 554
 555        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 556                /*
 557                 * Drain the remaining delta count out of a event
 558                 * that we are disabling:
 559                 */
 560                uncore_perf_event_update(box, event);
 561                hwc->state |= PERF_HES_UPTODATE;
 562        }
 563}
 564
 565int uncore_pmu_event_add(struct perf_event *event, int flags)
 566{
 567        struct intel_uncore_box *box = uncore_event_to_box(event);
 568        struct hw_perf_event *hwc = &event->hw;
 569        int assign[UNCORE_PMC_IDX_MAX];
 570        int i, n, ret;
 571
 572        if (!box)
 573                return -ENODEV;
 574
 575        /*
 576         * The free funning counter is assigned in event_init().
 577         * The free running counter event and free running counter
 578         * are 1:1 mapped. It doesn't need to be tracked in event_list.
 579         */
 580        if (uncore_pmc_freerunning(hwc->idx)) {
 581                if (flags & PERF_EF_START)
 582                        uncore_pmu_event_start(event, 0);
 583                return 0;
 584        }
 585
 586        ret = n = uncore_collect_events(box, event, false);
 587        if (ret < 0)
 588                return ret;
 589
 590        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 591        if (!(flags & PERF_EF_START))
 592                hwc->state |= PERF_HES_ARCH;
 593
 594        ret = uncore_assign_events(box, assign, n);
 595        if (ret)
 596                return ret;
 597
 598        /* save events moving to new counters */
 599        for (i = 0; i < box->n_events; i++) {
 600                event = box->event_list[i];
 601                hwc = &event->hw;
 602
 603                if (hwc->idx == assign[i] &&
 604                        hwc->last_tag == box->tags[assign[i]])
 605                        continue;
 606                /*
 607                 * Ensure we don't accidentally enable a stopped
 608                 * counter simply because we rescheduled.
 609                 */
 610                if (hwc->state & PERF_HES_STOPPED)
 611                        hwc->state |= PERF_HES_ARCH;
 612
 613                uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 614        }
 615
 616        /* reprogram moved events into new counters */
 617        for (i = 0; i < n; i++) {
 618                event = box->event_list[i];
 619                hwc = &event->hw;
 620
 621                if (hwc->idx != assign[i] ||
 622                        hwc->last_tag != box->tags[assign[i]])
 623                        uncore_assign_hw_event(box, event, assign[i]);
 624                else if (i < box->n_events)
 625                        continue;
 626
 627                if (hwc->state & PERF_HES_ARCH)
 628                        continue;
 629
 630                uncore_pmu_event_start(event, 0);
 631        }
 632        box->n_events = n;
 633
 634        return 0;
 635}
 636
 637void uncore_pmu_event_del(struct perf_event *event, int flags)
 638{
 639        struct intel_uncore_box *box = uncore_event_to_box(event);
 640        int i;
 641
 642        uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 643
 644        /*
 645         * The event for free running counter is not tracked by event_list.
 646         * It doesn't need to force event->hw.idx = -1 to reassign the counter.
 647         * Because the event and the free running counter are 1:1 mapped.
 648         */
 649        if (uncore_pmc_freerunning(event->hw.idx))
 650                return;
 651
 652        for (i = 0; i < box->n_events; i++) {
 653                if (event == box->event_list[i]) {
 654                        uncore_put_event_constraint(box, event);
 655
 656                        for (++i; i < box->n_events; i++)
 657                                box->event_list[i - 1] = box->event_list[i];
 658
 659                        --box->n_events;
 660                        break;
 661                }
 662        }
 663
 664        event->hw.idx = -1;
 665        event->hw.last_tag = ~0ULL;
 666}
 667
 668void uncore_pmu_event_read(struct perf_event *event)
 669{
 670        struct intel_uncore_box *box = uncore_event_to_box(event);
 671        uncore_perf_event_update(box, event);
 672}
 673
 674/*
 675 * validation ensures the group can be loaded onto the
 676 * PMU if it was the only group available.
 677 */
 678static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 679                                struct perf_event *event)
 680{
 681        struct perf_event *leader = event->group_leader;
 682        struct intel_uncore_box *fake_box;
 683        int ret = -EINVAL, n;
 684
 685        /* The free running counter is always active. */
 686        if (uncore_pmc_freerunning(event->hw.idx))
 687                return 0;
 688
 689        fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
 690        if (!fake_box)
 691                return -ENOMEM;
 692
 693        fake_box->pmu = pmu;
 694        /*
 695         * the event is not yet connected with its
 696         * siblings therefore we must first collect
 697         * existing siblings, then add the new event
 698         * before we can simulate the scheduling
 699         */
 700        n = uncore_collect_events(fake_box, leader, true);
 701        if (n < 0)
 702                goto out;
 703
 704        fake_box->n_events = n;
 705        n = uncore_collect_events(fake_box, event, false);
 706        if (n < 0)
 707                goto out;
 708
 709        fake_box->n_events = n;
 710
 711        ret = uncore_assign_events(fake_box, NULL, n);
 712out:
 713        kfree(fake_box);
 714        return ret;
 715}
 716
 717static int uncore_pmu_event_init(struct perf_event *event)
 718{
 719        struct intel_uncore_pmu *pmu;
 720        struct intel_uncore_box *box;
 721        struct hw_perf_event *hwc = &event->hw;
 722        int ret;
 723
 724        if (event->attr.type != event->pmu->type)
 725                return -ENOENT;
 726
 727        pmu = uncore_event_to_pmu(event);
 728        /* no device found for this pmu */
 729        if (pmu->func_id < 0)
 730                return -ENOENT;
 731
 732        /* Sampling not supported yet */
 733        if (hwc->sample_period)
 734                return -EINVAL;
 735
 736        /*
 737         * Place all uncore events for a particular physical package
 738         * onto a single cpu
 739         */
 740        if (event->cpu < 0)
 741                return -EINVAL;
 742        box = uncore_pmu_to_box(pmu, event->cpu);
 743        if (!box || box->cpu < 0)
 744                return -EINVAL;
 745        event->cpu = box->cpu;
 746        event->pmu_private = box;
 747
 748        event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
 749
 750        event->hw.idx = -1;
 751        event->hw.last_tag = ~0ULL;
 752        event->hw.extra_reg.idx = EXTRA_REG_NONE;
 753        event->hw.branch_reg.idx = EXTRA_REG_NONE;
 754
 755        if (event->attr.config == UNCORE_FIXED_EVENT) {
 756                /* no fixed counter */
 757                if (!pmu->type->fixed_ctl)
 758                        return -EINVAL;
 759                /*
 760                 * if there is only one fixed counter, only the first pmu
 761                 * can access the fixed counter
 762                 */
 763                if (pmu->type->single_fixed && pmu->pmu_idx > 0)
 764                        return -EINVAL;
 765
 766                /* fixed counters have event field hardcoded to zero */
 767                hwc->config = 0ULL;
 768        } else if (is_freerunning_event(event)) {
 769                hwc->config = event->attr.config;
 770                if (!check_valid_freerunning_event(box, event))
 771                        return -EINVAL;
 772                event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
 773                /*
 774                 * The free running counter event and free running counter
 775                 * are always 1:1 mapped.
 776                 * The free running counter is always active.
 777                 * Assign the free running counter here.
 778                 */
 779                event->hw.event_base = uncore_freerunning_counter(box, event);
 780        } else {
 781                hwc->config = event->attr.config &
 782                              (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
 783                if (pmu->type->ops->hw_config) {
 784                        ret = pmu->type->ops->hw_config(box, event);
 785                        if (ret)
 786                                return ret;
 787                }
 788        }
 789
 790        if (event->group_leader != event)
 791                ret = uncore_validate_group(pmu, event);
 792        else
 793                ret = 0;
 794
 795        return ret;
 796}
 797
 798static void uncore_pmu_enable(struct pmu *pmu)
 799{
 800        struct intel_uncore_pmu *uncore_pmu;
 801        struct intel_uncore_box *box;
 802
 803        uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
 804
 805        box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 806        if (!box)
 807                return;
 808
 809        if (uncore_pmu->type->ops->enable_box)
 810                uncore_pmu->type->ops->enable_box(box);
 811}
 812
 813static void uncore_pmu_disable(struct pmu *pmu)
 814{
 815        struct intel_uncore_pmu *uncore_pmu;
 816        struct intel_uncore_box *box;
 817
 818        uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
 819
 820        box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 821        if (!box)
 822                return;
 823
 824        if (uncore_pmu->type->ops->disable_box)
 825                uncore_pmu->type->ops->disable_box(box);
 826}
 827
 828static ssize_t uncore_get_attr_cpumask(struct device *dev,
 829                                struct device_attribute *attr, char *buf)
 830{
 831        return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
 832}
 833
 834static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
 835
 836static struct attribute *uncore_pmu_attrs[] = {
 837        &dev_attr_cpumask.attr,
 838        NULL,
 839};
 840
 841static const struct attribute_group uncore_pmu_attr_group = {
 842        .attrs = uncore_pmu_attrs,
 843};
 844
 845static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
 846{
 847        struct intel_uncore_type *type = pmu->type;
 848
 849        /*
 850         * No uncore block name in discovery table.
 851         * Use uncore_type_&typeid_&boxid as name.
 852         */
 853        if (!type->name) {
 854                if (type->num_boxes == 1)
 855                        sprintf(pmu->name, "uncore_type_%u", type->type_id);
 856                else {
 857                        sprintf(pmu->name, "uncore_type_%u_%d",
 858                                type->type_id, type->box_ids[pmu->pmu_idx]);
 859                }
 860                return;
 861        }
 862
 863        if (type->num_boxes == 1) {
 864                if (strlen(type->name) > 0)
 865                        sprintf(pmu->name, "uncore_%s", type->name);
 866                else
 867                        sprintf(pmu->name, "uncore");
 868        } else
 869                sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx);
 870
 871}
 872
 873static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 874{
 875        int ret;
 876
 877        if (!pmu->type->pmu) {
 878                pmu->pmu = (struct pmu) {
 879                        .attr_groups    = pmu->type->attr_groups,
 880                        .task_ctx_nr    = perf_invalid_context,
 881                        .pmu_enable     = uncore_pmu_enable,
 882                        .pmu_disable    = uncore_pmu_disable,
 883                        .event_init     = uncore_pmu_event_init,
 884                        .add            = uncore_pmu_event_add,
 885                        .del            = uncore_pmu_event_del,
 886                        .start          = uncore_pmu_event_start,
 887                        .stop           = uncore_pmu_event_stop,
 888                        .read           = uncore_pmu_event_read,
 889                        .module         = THIS_MODULE,
 890                        .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
 891                        .attr_update    = pmu->type->attr_update,
 892                };
 893        } else {
 894                pmu->pmu = *pmu->type->pmu;
 895                pmu->pmu.attr_groups = pmu->type->attr_groups;
 896                pmu->pmu.attr_update = pmu->type->attr_update;
 897        }
 898
 899        uncore_get_pmu_name(pmu);
 900
 901        ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 902        if (!ret)
 903                pmu->registered = true;
 904        return ret;
 905}
 906
 907static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
 908{
 909        if (!pmu->registered)
 910                return;
 911        perf_pmu_unregister(&pmu->pmu);
 912        pmu->registered = false;
 913}
 914
 915static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 916{
 917        int die;
 918
 919        for (die = 0; die < uncore_max_dies(); die++)
 920                kfree(pmu->boxes[die]);
 921        kfree(pmu->boxes);
 922}
 923
 924static void uncore_type_exit(struct intel_uncore_type *type)
 925{
 926        struct intel_uncore_pmu *pmu = type->pmus;
 927        int i;
 928
 929        if (type->cleanup_mapping)
 930                type->cleanup_mapping(type);
 931
 932        if (pmu) {
 933                for (i = 0; i < type->num_boxes; i++, pmu++) {
 934                        uncore_pmu_unregister(pmu);
 935                        uncore_free_boxes(pmu);
 936                }
 937                kfree(type->pmus);
 938                type->pmus = NULL;
 939        }
 940        if (type->box_ids) {
 941                kfree(type->box_ids);
 942                type->box_ids = NULL;
 943        }
 944        kfree(type->events_group);
 945        type->events_group = NULL;
 946}
 947
 948static void uncore_types_exit(struct intel_uncore_type **types)
 949{
 950        for (; *types; types++)
 951                uncore_type_exit(*types);
 952}
 953
 954static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 955{
 956        struct intel_uncore_pmu *pmus;
 957        size_t size;
 958        int i, j;
 959
 960        pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
 961        if (!pmus)
 962                return -ENOMEM;
 963
 964        size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
 965
 966        for (i = 0; i < type->num_boxes; i++) {
 967                pmus[i].func_id = setid ? i : -1;
 968                pmus[i].pmu_idx = i;
 969                pmus[i].type    = type;
 970                pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
 971                if (!pmus[i].boxes)
 972                        goto err;
 973        }
 974
 975        type->pmus = pmus;
 976        type->unconstrainted = (struct event_constraint)
 977                __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
 978                                0, type->num_counters, 0, 0);
 979
 980        if (type->event_descs) {
 981                struct {
 982                        struct attribute_group group;
 983                        struct attribute *attrs[];
 984                } *attr_group;
 985                for (i = 0; type->event_descs[i].attr.attr.name; i++);
 986
 987                attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
 988                                                                GFP_KERNEL);
 989                if (!attr_group)
 990                        goto err;
 991
 992                attr_group->group.name = "events";
 993                attr_group->group.attrs = attr_group->attrs;
 994
 995                for (j = 0; j < i; j++)
 996                        attr_group->attrs[j] = &type->event_descs[j].attr.attr;
 997
 998                type->events_group = &attr_group->group;
 999        }
1000
1001        type->pmu_group = &uncore_pmu_attr_group;
1002
1003        if (type->set_mapping)
1004                type->set_mapping(type);
1005
1006        return 0;
1007
1008err:
1009        for (i = 0; i < type->num_boxes; i++)
1010                kfree(pmus[i].boxes);
1011        kfree(pmus);
1012
1013        return -ENOMEM;
1014}
1015
1016static int __init
1017uncore_types_init(struct intel_uncore_type **types, bool setid)
1018{
1019        int ret;
1020
1021        for (; *types; types++) {
1022                ret = uncore_type_init(*types, setid);
1023                if (ret)
1024                        return ret;
1025        }
1026        return 0;
1027}
1028
1029/*
1030 * Get the die information of a PCI device.
1031 * @pdev: The PCI device.
1032 * @die: The die id which the device maps to.
1033 */
1034static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1035{
1036        *die = uncore_pcibus_to_dieid(pdev->bus);
1037        if (*die < 0)
1038                return -EINVAL;
1039
1040        return 0;
1041}
1042
1043static struct intel_uncore_pmu *
1044uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1045{
1046        struct intel_uncore_type **types = uncore_pci_uncores;
1047        struct intel_uncore_type *type;
1048        u64 box_ctl;
1049        int i, die;
1050
1051        for (; *types; types++) {
1052                type = *types;
1053                for (die = 0; die < __uncore_max_dies; die++) {
1054                        for (i = 0; i < type->num_boxes; i++) {
1055                                if (!type->box_ctls[die])
1056                                        continue;
1057                                box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1058                                if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1059                                    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1060                                    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1061                                        return &type->pmus[i];
1062                        }
1063                }
1064        }
1065
1066        return NULL;
1067}
1068
1069/*
1070 * Find the PMU of a PCI device.
1071 * @pdev: The PCI device.
1072 * @ids: The ID table of the available PCI devices with a PMU.
1073 *       If NULL, search the whole uncore_pci_uncores.
1074 */
1075static struct intel_uncore_pmu *
1076uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1077{
1078        struct intel_uncore_pmu *pmu = NULL;
1079        struct intel_uncore_type *type;
1080        kernel_ulong_t data;
1081        unsigned int devfn;
1082
1083        if (!ids)
1084                return uncore_pci_find_dev_pmu_from_types(pdev);
1085
1086        while (ids && ids->vendor) {
1087                if ((ids->vendor == pdev->vendor) &&
1088                    (ids->device == pdev->device)) {
1089                        data = ids->driver_data;
1090                        devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1091                                          UNCORE_PCI_DEV_FUNC(data));
1092                        if (devfn == pdev->devfn) {
1093                                type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1094                                pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1095                                break;
1096                        }
1097                }
1098                ids++;
1099        }
1100        return pmu;
1101}
1102
1103/*
1104 * Register the PMU for a PCI device
1105 * @pdev: The PCI device.
1106 * @type: The corresponding PMU type of the device.
1107 * @pmu: The corresponding PMU of the device.
1108 * @die: The die id which the device maps to.
1109 */
1110static int uncore_pci_pmu_register(struct pci_dev *pdev,
1111                                   struct intel_uncore_type *type,
1112                                   struct intel_uncore_pmu *pmu,
1113                                   int die)
1114{
1115        struct intel_uncore_box *box;
1116        int ret;
1117
1118        if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1119                return -EINVAL;
1120
1121        box = uncore_alloc_box(type, NUMA_NO_NODE);
1122        if (!box)
1123                return -ENOMEM;
1124
1125        if (pmu->func_id < 0)
1126                pmu->func_id = pdev->devfn;
1127        else
1128                WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1129
1130        atomic_inc(&box->refcnt);
1131        box->dieid = die;
1132        box->pci_dev = pdev;
1133        box->pmu = pmu;
1134        uncore_box_init(box);
1135
1136        pmu->boxes[die] = box;
1137        if (atomic_inc_return(&pmu->activeboxes) > 1)
1138                return 0;
1139
1140        /* First active box registers the pmu */
1141        ret = uncore_pmu_register(pmu);
1142        if (ret) {
1143                pmu->boxes[die] = NULL;
1144                uncore_box_exit(box);
1145                kfree(box);
1146        }
1147        return ret;
1148}
1149
1150/*
1151 * add a pci uncore device
1152 */
1153static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1154{
1155        struct intel_uncore_type *type;
1156        struct intel_uncore_pmu *pmu = NULL;
1157        int die, ret;
1158
1159        ret = uncore_pci_get_dev_die_info(pdev, &die);
1160        if (ret)
1161                return ret;
1162
1163        if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1164                int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1165
1166                uncore_extra_pci_dev[die].dev[idx] = pdev;
1167                pci_set_drvdata(pdev, NULL);
1168                return 0;
1169        }
1170
1171        type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1172
1173        /*
1174         * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1175         * for multiple instances of an uncore PMU device type. We should check
1176         * PCI slot and func to indicate the uncore box.
1177         */
1178        if (id->driver_data & ~0xffff) {
1179                struct pci_driver *pci_drv = pdev->driver;
1180
1181                pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1182                if (pmu == NULL)
1183                        return -ENODEV;
1184        } else {
1185                /*
1186                 * for performance monitoring unit with multiple boxes,
1187                 * each box has a different function id.
1188                 */
1189                pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1190        }
1191
1192        ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1193
1194        pci_set_drvdata(pdev, pmu->boxes[die]);
1195
1196        return ret;
1197}
1198
1199/*
1200 * Unregister the PMU of a PCI device
1201 * @pmu: The corresponding PMU is unregistered.
1202 * @die: The die id which the device maps to.
1203 */
1204static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1205{
1206        struct intel_uncore_box *box = pmu->boxes[die];
1207
1208        pmu->boxes[die] = NULL;
1209        if (atomic_dec_return(&pmu->activeboxes) == 0)
1210                uncore_pmu_unregister(pmu);
1211        uncore_box_exit(box);
1212        kfree(box);
1213}
1214
1215static void uncore_pci_remove(struct pci_dev *pdev)
1216{
1217        struct intel_uncore_box *box;
1218        struct intel_uncore_pmu *pmu;
1219        int i, die;
1220
1221        if (uncore_pci_get_dev_die_info(pdev, &die))
1222                return;
1223
1224        box = pci_get_drvdata(pdev);
1225        if (!box) {
1226                for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1227                        if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1228                                uncore_extra_pci_dev[die].dev[i] = NULL;
1229                                break;
1230                        }
1231                }
1232                WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1233                return;
1234        }
1235
1236        pmu = box->pmu;
1237
1238        pci_set_drvdata(pdev, NULL);
1239
1240        uncore_pci_pmu_unregister(pmu, die);
1241}
1242
1243static int uncore_bus_notify(struct notifier_block *nb,
1244                             unsigned long action, void *data,
1245                             const struct pci_device_id *ids)
1246{
1247        struct device *dev = data;
1248        struct pci_dev *pdev = to_pci_dev(dev);
1249        struct intel_uncore_pmu *pmu;
1250        int die;
1251
1252        /* Unregister the PMU when the device is going to be deleted. */
1253        if (action != BUS_NOTIFY_DEL_DEVICE)
1254                return NOTIFY_DONE;
1255
1256        pmu = uncore_pci_find_dev_pmu(pdev, ids);
1257        if (!pmu)
1258                return NOTIFY_DONE;
1259
1260        if (uncore_pci_get_dev_die_info(pdev, &die))
1261                return NOTIFY_DONE;
1262
1263        uncore_pci_pmu_unregister(pmu, die);
1264
1265        return NOTIFY_OK;
1266}
1267
1268static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1269                                     unsigned long action, void *data)
1270{
1271        return uncore_bus_notify(nb, action, data,
1272                                 uncore_pci_sub_driver->id_table);
1273}
1274
1275static struct notifier_block uncore_pci_sub_notifier = {
1276        .notifier_call = uncore_pci_sub_bus_notify,
1277};
1278
1279static void uncore_pci_sub_driver_init(void)
1280{
1281        const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1282        struct intel_uncore_type *type;
1283        struct intel_uncore_pmu *pmu;
1284        struct pci_dev *pci_sub_dev;
1285        bool notify = false;
1286        unsigned int devfn;
1287        int die;
1288
1289        while (ids && ids->vendor) {
1290                pci_sub_dev = NULL;
1291                type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1292                /*
1293                 * Search the available device, and register the
1294                 * corresponding PMU.
1295                 */
1296                while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1297                                                     ids->device, pci_sub_dev))) {
1298                        devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1299                                          UNCORE_PCI_DEV_FUNC(ids->driver_data));
1300                        if (devfn != pci_sub_dev->devfn)
1301                                continue;
1302
1303                        pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1304                        if (!pmu)
1305                                continue;
1306
1307                        if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1308                                continue;
1309
1310                        if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1311                                                     die))
1312                                notify = true;
1313                }
1314                ids++;
1315        }
1316
1317        if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1318                notify = false;
1319
1320        if (!notify)
1321                uncore_pci_sub_driver = NULL;
1322}
1323
1324static int uncore_pci_bus_notify(struct notifier_block *nb,
1325                                     unsigned long action, void *data)
1326{
1327        return uncore_bus_notify(nb, action, data, NULL);
1328}
1329
1330static struct notifier_block uncore_pci_notifier = {
1331        .notifier_call = uncore_pci_bus_notify,
1332};
1333
1334
1335static void uncore_pci_pmus_register(void)
1336{
1337        struct intel_uncore_type **types = uncore_pci_uncores;
1338        struct intel_uncore_type *type;
1339        struct intel_uncore_pmu *pmu;
1340        struct pci_dev *pdev;
1341        u64 box_ctl;
1342        int i, die;
1343
1344        for (; *types; types++) {
1345                type = *types;
1346                for (die = 0; die < __uncore_max_dies; die++) {
1347                        for (i = 0; i < type->num_boxes; i++) {
1348                                if (!type->box_ctls[die])
1349                                        continue;
1350                                box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1351                                pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1352                                                                   UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1353                                                                   UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1354                                if (!pdev)
1355                                        continue;
1356                                pmu = &type->pmus[i];
1357
1358                                uncore_pci_pmu_register(pdev, type, pmu, die);
1359                        }
1360                }
1361        }
1362
1363        bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1364}
1365
1366static int __init uncore_pci_init(void)
1367{
1368        size_t size;
1369        int ret;
1370
1371        size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1372        uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1373        if (!uncore_extra_pci_dev) {
1374                ret = -ENOMEM;
1375                goto err;
1376        }
1377
1378        ret = uncore_types_init(uncore_pci_uncores, false);
1379        if (ret)
1380                goto errtype;
1381
1382        if (uncore_pci_driver) {
1383                uncore_pci_driver->probe = uncore_pci_probe;
1384                uncore_pci_driver->remove = uncore_pci_remove;
1385
1386                ret = pci_register_driver(uncore_pci_driver);
1387                if (ret)
1388                        goto errtype;
1389        } else
1390                uncore_pci_pmus_register();
1391
1392        if (uncore_pci_sub_driver)
1393                uncore_pci_sub_driver_init();
1394
1395        pcidrv_registered = true;
1396        return 0;
1397
1398errtype:
1399        uncore_types_exit(uncore_pci_uncores);
1400        kfree(uncore_extra_pci_dev);
1401        uncore_extra_pci_dev = NULL;
1402        uncore_free_pcibus_map();
1403err:
1404        uncore_pci_uncores = empty_uncore;
1405        return ret;
1406}
1407
1408static void uncore_pci_exit(void)
1409{
1410        if (pcidrv_registered) {
1411                pcidrv_registered = false;
1412                if (uncore_pci_sub_driver)
1413                        bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1414                if (uncore_pci_driver)
1415                        pci_unregister_driver(uncore_pci_driver);
1416                else
1417                        bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1418                uncore_types_exit(uncore_pci_uncores);
1419                kfree(uncore_extra_pci_dev);
1420                uncore_free_pcibus_map();
1421        }
1422}
1423
1424static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1425                                   int new_cpu)
1426{
1427        struct intel_uncore_pmu *pmu = type->pmus;
1428        struct intel_uncore_box *box;
1429        int i, die;
1430
1431        die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1432        for (i = 0; i < type->num_boxes; i++, pmu++) {
1433                box = pmu->boxes[die];
1434                if (!box)
1435                        continue;
1436
1437                if (old_cpu < 0) {
1438                        WARN_ON_ONCE(box->cpu != -1);
1439                        box->cpu = new_cpu;
1440                        continue;
1441                }
1442
1443                WARN_ON_ONCE(box->cpu != old_cpu);
1444                box->cpu = -1;
1445                if (new_cpu < 0)
1446                        continue;
1447
1448                uncore_pmu_cancel_hrtimer(box);
1449                perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1450                box->cpu = new_cpu;
1451        }
1452}
1453
1454static void uncore_change_context(struct intel_uncore_type **uncores,
1455                                  int old_cpu, int new_cpu)
1456{
1457        for (; *uncores; uncores++)
1458                uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1459}
1460
1461static void uncore_box_unref(struct intel_uncore_type **types, int id)
1462{
1463        struct intel_uncore_type *type;
1464        struct intel_uncore_pmu *pmu;
1465        struct intel_uncore_box *box;
1466        int i;
1467
1468        for (; *types; types++) {
1469                type = *types;
1470                pmu = type->pmus;
1471                for (i = 0; i < type->num_boxes; i++, pmu++) {
1472                        box = pmu->boxes[id];
1473                        if (box && atomic_dec_return(&box->refcnt) == 0)
1474                                uncore_box_exit(box);
1475                }
1476        }
1477}
1478
1479static int uncore_event_cpu_offline(unsigned int cpu)
1480{
1481        int die, target;
1482
1483        /* Check if exiting cpu is used for collecting uncore events */
1484        if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1485                goto unref;
1486        /* Find a new cpu to collect uncore events */
1487        target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1488
1489        /* Migrate uncore events to the new target */
1490        if (target < nr_cpu_ids)
1491                cpumask_set_cpu(target, &uncore_cpu_mask);
1492        else
1493                target = -1;
1494
1495        uncore_change_context(uncore_msr_uncores, cpu, target);
1496        uncore_change_context(uncore_mmio_uncores, cpu, target);
1497        uncore_change_context(uncore_pci_uncores, cpu, target);
1498
1499unref:
1500        /* Clear the references */
1501        die = topology_logical_die_id(cpu);
1502        uncore_box_unref(uncore_msr_uncores, die);
1503        uncore_box_unref(uncore_mmio_uncores, die);
1504        return 0;
1505}
1506
1507static int allocate_boxes(struct intel_uncore_type **types,
1508                         unsigned int die, unsigned int cpu)
1509{
1510        struct intel_uncore_box *box, *tmp;
1511        struct intel_uncore_type *type;
1512        struct intel_uncore_pmu *pmu;
1513        LIST_HEAD(allocated);
1514        int i;
1515
1516        /* Try to allocate all required boxes */
1517        for (; *types; types++) {
1518                type = *types;
1519                pmu = type->pmus;
1520                for (i = 0; i < type->num_boxes; i++, pmu++) {
1521                        if (pmu->boxes[die])
1522                                continue;
1523                        box = uncore_alloc_box(type, cpu_to_node(cpu));
1524                        if (!box)
1525                                goto cleanup;
1526                        box->pmu = pmu;
1527                        box->dieid = die;
1528                        list_add(&box->active_list, &allocated);
1529                }
1530        }
1531        /* Install them in the pmus */
1532        list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1533                list_del_init(&box->active_list);
1534                box->pmu->boxes[die] = box;
1535        }
1536        return 0;
1537
1538cleanup:
1539        list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1540                list_del_init(&box->active_list);
1541                kfree(box);
1542        }
1543        return -ENOMEM;
1544}
1545
1546static int uncore_box_ref(struct intel_uncore_type **types,
1547                          int id, unsigned int cpu)
1548{
1549        struct intel_uncore_type *type;
1550        struct intel_uncore_pmu *pmu;
1551        struct intel_uncore_box *box;
1552        int i, ret;
1553
1554        ret = allocate_boxes(types, id, cpu);
1555        if (ret)
1556                return ret;
1557
1558        for (; *types; types++) {
1559                type = *types;
1560                pmu = type->pmus;
1561                for (i = 0; i < type->num_boxes; i++, pmu++) {
1562                        box = pmu->boxes[id];
1563                        if (box && atomic_inc_return(&box->refcnt) == 1)
1564                                uncore_box_init(box);
1565                }
1566        }
1567        return 0;
1568}
1569
1570static int uncore_event_cpu_online(unsigned int cpu)
1571{
1572        int die, target, msr_ret, mmio_ret;
1573
1574        die = topology_logical_die_id(cpu);
1575        msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1576        mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1577        if (msr_ret && mmio_ret)
1578                return -ENOMEM;
1579
1580        /*
1581         * Check if there is an online cpu in the package
1582         * which collects uncore events already.
1583         */
1584        target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1585        if (target < nr_cpu_ids)
1586                return 0;
1587
1588        cpumask_set_cpu(cpu, &uncore_cpu_mask);
1589
1590        if (!msr_ret)
1591                uncore_change_context(uncore_msr_uncores, -1, cpu);
1592        if (!mmio_ret)
1593                uncore_change_context(uncore_mmio_uncores, -1, cpu);
1594        uncore_change_context(uncore_pci_uncores, -1, cpu);
1595        return 0;
1596}
1597
1598static int __init type_pmu_register(struct intel_uncore_type *type)
1599{
1600        int i, ret;
1601
1602        for (i = 0; i < type->num_boxes; i++) {
1603                ret = uncore_pmu_register(&type->pmus[i]);
1604                if (ret)
1605                        return ret;
1606        }
1607        return 0;
1608}
1609
1610static int __init uncore_msr_pmus_register(void)
1611{
1612        struct intel_uncore_type **types = uncore_msr_uncores;
1613        int ret;
1614
1615        for (; *types; types++) {
1616                ret = type_pmu_register(*types);
1617                if (ret)
1618                        return ret;
1619        }
1620        return 0;
1621}
1622
1623static int __init uncore_cpu_init(void)
1624{
1625        int ret;
1626
1627        ret = uncore_types_init(uncore_msr_uncores, true);
1628        if (ret)
1629                goto err;
1630
1631        ret = uncore_msr_pmus_register();
1632        if (ret)
1633                goto err;
1634        return 0;
1635err:
1636        uncore_types_exit(uncore_msr_uncores);
1637        uncore_msr_uncores = empty_uncore;
1638        return ret;
1639}
1640
1641static int __init uncore_mmio_init(void)
1642{
1643        struct intel_uncore_type **types = uncore_mmio_uncores;
1644        int ret;
1645
1646        ret = uncore_types_init(types, true);
1647        if (ret)
1648                goto err;
1649
1650        for (; *types; types++) {
1651                ret = type_pmu_register(*types);
1652                if (ret)
1653                        goto err;
1654        }
1655        return 0;
1656err:
1657        uncore_types_exit(uncore_mmio_uncores);
1658        uncore_mmio_uncores = empty_uncore;
1659        return ret;
1660}
1661
1662struct intel_uncore_init_fun {
1663        void    (*cpu_init)(void);
1664        int     (*pci_init)(void);
1665        void    (*mmio_init)(void);
1666};
1667
1668static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1669        .cpu_init = nhm_uncore_cpu_init,
1670};
1671
1672static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1673        .cpu_init = snb_uncore_cpu_init,
1674        .pci_init = snb_uncore_pci_init,
1675};
1676
1677static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1678        .cpu_init = snb_uncore_cpu_init,
1679        .pci_init = ivb_uncore_pci_init,
1680};
1681
1682static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1683        .cpu_init = snb_uncore_cpu_init,
1684        .pci_init = hsw_uncore_pci_init,
1685};
1686
1687static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1688        .cpu_init = snb_uncore_cpu_init,
1689        .pci_init = bdw_uncore_pci_init,
1690};
1691
1692static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1693        .cpu_init = snbep_uncore_cpu_init,
1694        .pci_init = snbep_uncore_pci_init,
1695};
1696
1697static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1698        .cpu_init = nhmex_uncore_cpu_init,
1699};
1700
1701static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1702        .cpu_init = ivbep_uncore_cpu_init,
1703        .pci_init = ivbep_uncore_pci_init,
1704};
1705
1706static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1707        .cpu_init = hswep_uncore_cpu_init,
1708        .pci_init = hswep_uncore_pci_init,
1709};
1710
1711static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1712        .cpu_init = bdx_uncore_cpu_init,
1713        .pci_init = bdx_uncore_pci_init,
1714};
1715
1716static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1717        .cpu_init = knl_uncore_cpu_init,
1718        .pci_init = knl_uncore_pci_init,
1719};
1720
1721static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1722        .cpu_init = skl_uncore_cpu_init,
1723        .pci_init = skl_uncore_pci_init,
1724};
1725
1726static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1727        .cpu_init = skx_uncore_cpu_init,
1728        .pci_init = skx_uncore_pci_init,
1729};
1730
1731static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1732        .cpu_init = icl_uncore_cpu_init,
1733        .pci_init = skl_uncore_pci_init,
1734};
1735
1736static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1737        .cpu_init = tgl_uncore_cpu_init,
1738        .mmio_init = tgl_uncore_mmio_init,
1739};
1740
1741static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1742        .cpu_init = tgl_uncore_cpu_init,
1743        .mmio_init = tgl_l_uncore_mmio_init,
1744};
1745
1746static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1747        .cpu_init = tgl_uncore_cpu_init,
1748        .pci_init = skl_uncore_pci_init,
1749};
1750
1751static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1752        .cpu_init = adl_uncore_cpu_init,
1753        .mmio_init = tgl_uncore_mmio_init,
1754};
1755
1756static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1757        .cpu_init = icx_uncore_cpu_init,
1758        .pci_init = icx_uncore_pci_init,
1759        .mmio_init = icx_uncore_mmio_init,
1760};
1761
1762static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1763        .cpu_init = snr_uncore_cpu_init,
1764        .pci_init = snr_uncore_pci_init,
1765        .mmio_init = snr_uncore_mmio_init,
1766};
1767
1768static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1769        .cpu_init = intel_uncore_generic_uncore_cpu_init,
1770        .pci_init = intel_uncore_generic_uncore_pci_init,
1771        .mmio_init = intel_uncore_generic_uncore_mmio_init,
1772};
1773
1774static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1775        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1776        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1777        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1778        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1779        X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1780        X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1781        X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1782        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1783        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1784        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1785        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1786        X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1787        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1788        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1789        X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1790        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1791        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1792        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1793        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1794        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1795        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1796        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1797        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1798        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1799        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1800        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1801        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1802        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1803        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1804        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1805        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1806        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1807        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1808        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1809        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
1810        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
1811        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
1812        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1813        {},
1814};
1815MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1816
1817static int __init intel_uncore_init(void)
1818{
1819        const struct x86_cpu_id *id;
1820        struct intel_uncore_init_fun *uncore_init;
1821        int pret = 0, cret = 0, mret = 0, ret;
1822
1823        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1824                return -ENODEV;
1825
1826        __uncore_max_dies =
1827                topology_max_packages() * topology_max_die_per_package();
1828
1829        id = x86_match_cpu(intel_uncore_match);
1830        if (!id) {
1831                if (!uncore_no_discover && intel_uncore_has_discovery_tables())
1832                        uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1833                else
1834                        return -ENODEV;
1835        } else
1836                uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1837
1838        if (uncore_init->pci_init) {
1839                pret = uncore_init->pci_init();
1840                if (!pret)
1841                        pret = uncore_pci_init();
1842        }
1843
1844        if (uncore_init->cpu_init) {
1845                uncore_init->cpu_init();
1846                cret = uncore_cpu_init();
1847        }
1848
1849        if (uncore_init->mmio_init) {
1850                uncore_init->mmio_init();
1851                mret = uncore_mmio_init();
1852        }
1853
1854        if (cret && pret && mret) {
1855                ret = -ENODEV;
1856                goto free_discovery;
1857        }
1858
1859        /* Install hotplug callbacks to setup the targets for each package */
1860        ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1861                                "perf/x86/intel/uncore:online",
1862                                uncore_event_cpu_online,
1863                                uncore_event_cpu_offline);
1864        if (ret)
1865                goto err;
1866        return 0;
1867
1868err:
1869        uncore_types_exit(uncore_msr_uncores);
1870        uncore_types_exit(uncore_mmio_uncores);
1871        uncore_pci_exit();
1872free_discovery:
1873        intel_uncore_clear_discovery_tables();
1874        return ret;
1875}
1876module_init(intel_uncore_init);
1877
1878static void __exit intel_uncore_exit(void)
1879{
1880        cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1881        uncore_types_exit(uncore_msr_uncores);
1882        uncore_types_exit(uncore_mmio_uncores);
1883        uncore_pci_exit();
1884        intel_uncore_clear_discovery_tables();
1885}
1886module_exit(intel_uncore_exit);
1887