linux/arch/x86/oprofile/op_model_p4.c
<<
>>
Prefs
   1/**
   2 * @file op_model_p4.c
   3 * P4 model-specific MSR operations
   4 *
   5 * @remark Copyright 2002 OProfile authors
   6 * @remark Read the file COPYING
   7 *
   8 * @author Graydon Hoare
   9 */
  10
  11#include <linux/oprofile.h>
  12#include <linux/smp.h>
  13#include <linux/ptrace.h>
  14#include <linux/nmi.h>
  15#include <asm/msr.h>
  16#include <asm/fixmap.h>
  17#include <asm/apic.h>
  18
  19
  20#include "op_x86_model.h"
  21#include "op_counter.h"
  22
  23#define NUM_EVENTS 39
  24
  25#define NUM_COUNTERS_NON_HT 8
  26#define NUM_ESCRS_NON_HT 45
  27#define NUM_CCCRS_NON_HT 18
  28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
  29
  30#define NUM_COUNTERS_HT2 4
  31#define NUM_ESCRS_HT2 23
  32#define NUM_CCCRS_HT2 9
  33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
  34
  35static unsigned int num_counters = NUM_COUNTERS_NON_HT;
  36static unsigned int num_controls = NUM_CONTROLS_NON_HT;
  37
  38/* this has to be checked dynamically since the
  39   hyper-threadedness of a chip is discovered at
  40   kernel boot-time. */
  41static inline void setup_num_counters(void)
  42{
  43#ifdef CONFIG_SMP
  44        if (smp_num_siblings == 2) {
  45                num_counters = NUM_COUNTERS_HT2;
  46                num_controls = NUM_CONTROLS_HT2;
  47        }
  48#endif
  49}
  50
  51static int inline addr_increment(void)
  52{
  53#ifdef CONFIG_SMP
  54        return smp_num_siblings == 2 ? 2 : 1;
  55#else
  56        return 1;
  57#endif
  58}
  59
  60
  61/* tables to simulate simplified hardware view of p4 registers */
  62struct p4_counter_binding {
  63        int virt_counter;
  64        int counter_address;
  65        int cccr_address;
  66};
  67
  68struct p4_event_binding {
  69        int escr_select;  /* value to put in CCCR */
  70        int event_select; /* value to put in ESCR */
  71        struct {
  72                int virt_counter; /* for this counter... */
  73                int escr_address; /* use this ESCR       */
  74        } bindings[2];
  75};
  76
  77/* nb: these CTR_* defines are a duplicate of defines in
  78   event/i386.p4*events. */
  79
  80
  81#define CTR_BPU_0      (1 << 0)
  82#define CTR_MS_0       (1 << 1)
  83#define CTR_FLAME_0    (1 << 2)
  84#define CTR_IQ_4       (1 << 3)
  85#define CTR_BPU_2      (1 << 4)
  86#define CTR_MS_2       (1 << 5)
  87#define CTR_FLAME_2    (1 << 6)
  88#define CTR_IQ_5       (1 << 7)
  89
  90static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
  91        { CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
  92        { CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
  93        { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
  94        { CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
  95        { CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
  96        { CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
  97        { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
  98        { CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
  99};
 100
 101#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
 102
 103/* p4 event codes in libop/op_event.h are indices into this table. */
 104
 105static struct p4_event_binding p4_events[NUM_EVENTS] = {
 106
 107        { /* BRANCH_RETIRED */
 108                0x05, 0x06,
 109                { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
 110                  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 111        },
 112
 113        { /* MISPRED_BRANCH_RETIRED */
 114                0x04, 0x03,
 115                { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 116                  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 117        },
 118
 119        { /* TC_DELIVER_MODE */
 120                0x01, 0x01,
 121                { { CTR_MS_0, MSR_P4_TC_ESCR0},
 122                  { CTR_MS_2, MSR_P4_TC_ESCR1} }
 123        },
 124
 125        { /* BPU_FETCH_REQUEST */
 126                0x00, 0x03,
 127                { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
 128                  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
 129        },
 130
 131        { /* ITLB_REFERENCE */
 132                0x03, 0x18,
 133                { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
 134                  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
 135        },
 136
 137        { /* MEMORY_CANCEL */
 138                0x05, 0x02,
 139                { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
 140                  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
 141        },
 142
 143        { /* MEMORY_COMPLETE */
 144                0x02, 0x08,
 145                { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 146                  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 147        },
 148
 149        { /* LOAD_PORT_REPLAY */
 150                0x02, 0x04,
 151                { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 152                  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 153        },
 154
 155        { /* STORE_PORT_REPLAY */
 156                0x02, 0x05,
 157                { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 158                  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 159        },
 160
 161        { /* MOB_LOAD_REPLAY */
 162                0x02, 0x03,
 163                { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
 164                  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
 165        },
 166
 167        { /* PAGE_WALK_TYPE */
 168                0x04, 0x01,
 169                { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
 170                  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
 171        },
 172
 173        { /* BSQ_CACHE_REFERENCE */
 174                0x07, 0x0c,
 175                { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 176                  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
 177        },
 178
 179        { /* IOQ_ALLOCATION */
 180                0x06, 0x03,
 181                { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 182                  { 0, 0 } }
 183        },
 184
 185        { /* IOQ_ACTIVE_ENTRIES */
 186                0x06, 0x1a,
 187                { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
 188                  { 0, 0 } }
 189        },
 190
 191        { /* FSB_DATA_ACTIVITY */
 192                0x06, 0x17,
 193                { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 194                  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 195        },
 196
 197        { /* BSQ_ALLOCATION */
 198                0x07, 0x05,
 199                { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 200                  { 0, 0 } }
 201        },
 202
 203        { /* BSQ_ACTIVE_ENTRIES */
 204                0x07, 0x06,
 205                { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
 206                  { 0, 0 } }
 207        },
 208
 209        { /* X87_ASSIST */
 210                0x05, 0x03,
 211                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 212                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 213        },
 214
 215        { /* SSE_INPUT_ASSIST */
 216                0x01, 0x34,
 217                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 218                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 219        },
 220
 221        { /* PACKED_SP_UOP */
 222                0x01, 0x08,
 223                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 224                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 225        },
 226
 227        { /* PACKED_DP_UOP */
 228                0x01, 0x0c,
 229                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 230                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 231        },
 232
 233        { /* SCALAR_SP_UOP */
 234                0x01, 0x0a,
 235                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 236                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 237        },
 238
 239        { /* SCALAR_DP_UOP */
 240                0x01, 0x0e,
 241                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 242                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 243        },
 244
 245        { /* 64BIT_MMX_UOP */
 246                0x01, 0x02,
 247                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 248                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 249        },
 250
 251        { /* 128BIT_MMX_UOP */
 252                0x01, 0x1a,
 253                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 254                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 255        },
 256
 257        { /* X87_FP_UOP */
 258                0x01, 0x04,
 259                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 260                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 261        },
 262
 263        { /* X87_SIMD_MOVES_UOP */
 264                0x01, 0x2e,
 265                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 266                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 267        },
 268
 269        { /* MACHINE_CLEAR */
 270                0x05, 0x02,
 271                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 272                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 273        },
 274
 275        { /* GLOBAL_POWER_EVENTS */
 276                0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
 277                { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 278                  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 279        },
 280
 281        { /* TC_MS_XFER */
 282                0x00, 0x05,
 283                { { CTR_MS_0, MSR_P4_MS_ESCR0},
 284                  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 285        },
 286
 287        { /* UOP_QUEUE_WRITES */
 288                0x00, 0x09,
 289                { { CTR_MS_0, MSR_P4_MS_ESCR0},
 290                  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 291        },
 292
 293        { /* FRONT_END_EVENT */
 294                0x05, 0x08,
 295                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 296                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 297        },
 298
 299        { /* EXECUTION_EVENT */
 300                0x05, 0x0c,
 301                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 302                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 303        },
 304
 305        { /* REPLAY_EVENT */
 306                0x05, 0x09,
 307                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 308                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 309        },
 310
 311        { /* INSTR_RETIRED */
 312                0x04, 0x02,
 313                { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 314                  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 315        },
 316
 317        { /* UOPS_RETIRED */
 318                0x04, 0x01,
 319                { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 320                  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 321        },
 322
 323        { /* UOP_TYPE */
 324                0x02, 0x02,
 325                { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
 326                  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
 327        },
 328
 329        { /* RETIRED_MISPRED_BRANCH_TYPE */
 330                0x02, 0x05,
 331                { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 332                  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 333        },
 334
 335        { /* RETIRED_BRANCH_TYPE */
 336                0x02, 0x04,
 337                { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 338                  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 339        }
 340};
 341
 342
 343#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
 344
 345#define ESCR_RESERVED_BITS 0x80000003
 346#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
 347#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
 348#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
 349#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
 350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
 353#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 354#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 355
 356#define CCCR_RESERVED_BITS 0x38030FFF
 357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
 358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
 359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
 360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
 361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
 364#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 365#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 366#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 367#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 368
 369#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
 370#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
 371#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
 372#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
 373#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
 374
 375
 376/* this assigns a "stagger" to the current CPU, which is used throughout
 377   the code in this module as an extra array offset, to select the "even"
 378   or "odd" part of all the divided resources. */
 379static unsigned int get_stagger(void)
 380{
 381#ifdef CONFIG_SMP
 382        int cpu = smp_processor_id();
 383        return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
 384#endif
 385        return 0;
 386}
 387
 388
 389/* finally, mediate access to a real hardware counter
 390   by passing a "virtual" counter numer to this macro,
 391   along with your stagger setting. */
 392#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
 393
 394static unsigned long reset_value[NUM_COUNTERS_NON_HT];
 395
 396
 397static void p4_fill_in_addresses(struct op_msrs * const msrs)
 398{
 399        unsigned int i;
 400        unsigned int addr, cccraddr, stag;
 401
 402        setup_num_counters();
 403        stag = get_stagger();
 404
 405        /* initialize some registers */
 406        for (i = 0; i < num_counters; ++i)
 407                msrs->counters[i].addr = 0;
 408        for (i = 0; i < num_controls; ++i)
 409                msrs->controls[i].addr = 0;
 410
 411        /* the counter & cccr registers we pay attention to */
 412        for (i = 0; i < num_counters; ++i) {
 413                addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
 414                cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
 415                if (reserve_perfctr_nmi(addr)) {
 416                        msrs->counters[i].addr = addr;
 417                        msrs->controls[i].addr = cccraddr;
 418                }
 419        }
 420
 421        /* 43 ESCR registers in three or four discontiguous group */
 422        for (addr = MSR_P4_BSU_ESCR0 + stag;
 423             addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
 424                if (reserve_evntsel_nmi(addr))
 425                        msrs->controls[i].addr = addr;
 426        }
 427
 428        /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
 429         * to avoid special case in nmi_{save|restore}_registers() */
 430        if (boot_cpu_data.x86_model >= 0x3) {
 431                for (addr = MSR_P4_BSU_ESCR0 + stag;
 432                     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
 433                        if (reserve_evntsel_nmi(addr))
 434                                msrs->controls[i].addr = addr;
 435                }
 436        } else {
 437                for (addr = MSR_P4_IQ_ESCR0 + stag;
 438                     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
 439                        if (reserve_evntsel_nmi(addr))
 440                                msrs->controls[i].addr = addr;
 441                }
 442        }
 443
 444        for (addr = MSR_P4_RAT_ESCR0 + stag;
 445             addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
 446                if (reserve_evntsel_nmi(addr))
 447                        msrs->controls[i].addr = addr;
 448        }
 449
 450        for (addr = MSR_P4_MS_ESCR0 + stag;
 451             addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
 452                if (reserve_evntsel_nmi(addr))
 453                        msrs->controls[i].addr = addr;
 454        }
 455
 456        for (addr = MSR_P4_IX_ESCR0 + stag;
 457             addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
 458                if (reserve_evntsel_nmi(addr))
 459                        msrs->controls[i].addr = addr;
 460        }
 461
 462        /* there are 2 remaining non-contiguously located ESCRs */
 463
 464        if (num_counters == NUM_COUNTERS_NON_HT) {
 465                /* standard non-HT CPUs handle both remaining ESCRs*/
 466                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
 467                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
 468                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
 469                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 470
 471        } else if (stag == 0) {
 472                /* HT CPUs give the first remainder to the even thread, as
 473                   the 32nd control register */
 474                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
 475                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 476
 477        } else {
 478                /* and two copies of the second to the odd thread,
 479                   for the 22st and 23nd control registers */
 480                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
 481                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
 482                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
 483                }
 484        }
 485}
 486
 487
 488static void pmc_setup_one_p4_counter(unsigned int ctr)
 489{
 490        int i;
 491        int const maxbind = 2;
 492        unsigned int cccr = 0;
 493        unsigned int escr = 0;
 494        unsigned int high = 0;
 495        unsigned int counter_bit;
 496        struct p4_event_binding *ev = NULL;
 497        unsigned int stag;
 498
 499        stag = get_stagger();
 500
 501        /* convert from counter *number* to counter *bit* */
 502        counter_bit = 1 << VIRT_CTR(stag, ctr);
 503
 504        /* find our event binding structure. */
 505        if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
 506                printk(KERN_ERR
 507                       "oprofile: P4 event code 0x%lx out of range\n",
 508                       counter_config[ctr].event);
 509                return;
 510        }
 511
 512        ev = &(p4_events[counter_config[ctr].event - 1]);
 513
 514        for (i = 0; i < maxbind; i++) {
 515                if (ev->bindings[i].virt_counter & counter_bit) {
 516
 517                        /* modify ESCR */
 518                        ESCR_READ(escr, high, ev, i);
 519                        ESCR_CLEAR(escr);
 520                        if (stag == 0) {
 521                                ESCR_SET_USR_0(escr, counter_config[ctr].user);
 522                                ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
 523                        } else {
 524                                ESCR_SET_USR_1(escr, counter_config[ctr].user);
 525                                ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
 526                        }
 527                        ESCR_SET_EVENT_SELECT(escr, ev->event_select);
 528                        ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
 529                        ESCR_WRITE(escr, high, ev, i);
 530
 531                        /* modify CCCR */
 532                        CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
 533                        CCCR_CLEAR(cccr);
 534                        CCCR_SET_REQUIRED_BITS(cccr);
 535                        CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
 536                        if (stag == 0)
 537                                CCCR_SET_PMI_OVF_0(cccr);
 538                        else
 539                                CCCR_SET_PMI_OVF_1(cccr);
 540                        CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
 541                        return;
 542                }
 543        }
 544
 545        printk(KERN_ERR
 546               "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
 547               counter_config[ctr].event, stag, ctr);
 548}
 549
 550
 551static void p4_setup_ctrs(struct op_msrs const * const msrs)
 552{
 553        unsigned int i;
 554        unsigned int low, high;
 555        unsigned int stag;
 556
 557        stag = get_stagger();
 558
 559        rdmsr(MSR_IA32_MISC_ENABLE, low, high);
 560        if (!MISC_PMC_ENABLED_P(low)) {
 561                printk(KERN_ERR "oprofile: P4 PMC not available\n");
 562                return;
 563        }
 564
 565        /* clear the cccrs we will use */
 566        for (i = 0 ; i < num_counters ; i++) {
 567                if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 568                        continue;
 569                rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 570                CCCR_CLEAR(low);
 571                CCCR_SET_REQUIRED_BITS(low);
 572                wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 573        }
 574
 575        /* clear all escrs (including those outside our concern) */
 576        for (i = num_counters; i < num_controls; i++) {
 577                if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 578                        continue;
 579                wrmsr(msrs->controls[i].addr, 0, 0);
 580        }
 581
 582        /* setup all counters */
 583        for (i = 0 ; i < num_counters ; ++i) {
 584                if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
 585                        reset_value[i] = counter_config[i].count;
 586                        pmc_setup_one_p4_counter(i);
 587                        CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
 588                } else {
 589                        reset_value[i] = 0;
 590                }
 591        }
 592}
 593
 594
 595static int p4_check_ctrs(struct pt_regs * const regs,
 596                         struct op_msrs const * const msrs)
 597{
 598        unsigned long ctr, low, high, stag, real;
 599        int i;
 600
 601        stag = get_stagger();
 602
 603        for (i = 0; i < num_counters; ++i) {
 604
 605                if (!reset_value[i])
 606                        continue;
 607
 608                /*
 609                 * there is some eccentricity in the hardware which
 610                 * requires that we perform 2 extra corrections:
 611                 *
 612                 * - check both the CCCR:OVF flag for overflow and the
 613                 *   counter high bit for un-flagged overflows.
 614                 *
 615                 * - write the counter back twice to ensure it gets
 616                 *   updated properly.
 617                 *
 618                 * the former seems to be related to extra NMIs happening
 619                 * during the current NMI; the latter is reported as errata
 620                 * N15 in intel doc 249199-029, pentium 4 specification
 621                 * update, though their suggested work-around does not
 622                 * appear to solve the problem.
 623                 */
 624
 625                real = VIRT_CTR(stag, i);
 626
 627                CCCR_READ(low, high, real);
 628                CTR_READ(ctr, high, real);
 629                if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
 630                        oprofile_add_sample(regs, i);
 631                        CTR_WRITE(reset_value[i], real);
 632                        CCCR_CLEAR_OVF(low);
 633                        CCCR_WRITE(low, high, real);
 634                        CTR_WRITE(reset_value[i], real);
 635                }
 636        }
 637
 638        /* P4 quirk: you have to re-unmask the apic vector */
 639        apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
 640
 641        /* See op_model_ppro.c */
 642        return 1;
 643}
 644
 645
 646static void p4_start(struct op_msrs const * const msrs)
 647{
 648        unsigned int low, high, stag;
 649        int i;
 650
 651        stag = get_stagger();
 652
 653        for (i = 0; i < num_counters; ++i) {
 654                if (!reset_value[i])
 655                        continue;
 656                CCCR_READ(low, high, VIRT_CTR(stag, i));
 657                CCCR_SET_ENABLE(low);
 658                CCCR_WRITE(low, high, VIRT_CTR(stag, i));
 659        }
 660}
 661
 662
 663static void p4_stop(struct op_msrs const * const msrs)
 664{
 665        unsigned int low, high, stag;
 666        int i;
 667
 668        stag = get_stagger();
 669
 670        for (i = 0; i < num_counters; ++i) {
 671                if (!reset_value[i])
 672                        continue;
 673                CCCR_READ(low, high, VIRT_CTR(stag, i));
 674                CCCR_SET_DISABLE(low);
 675                CCCR_WRITE(low, high, VIRT_CTR(stag, i));
 676        }
 677}
 678
 679static void p4_shutdown(struct op_msrs const * const msrs)
 680{
 681        int i;
 682
 683        for (i = 0 ; i < num_counters ; ++i) {
 684                if (CTR_IS_RESERVED(msrs, i))
 685                        release_perfctr_nmi(msrs->counters[i].addr);
 686        }
 687        /*
 688         * some of the control registers are specially reserved in
 689         * conjunction with the counter registers (hence the starting offset).
 690         * This saves a few bits.
 691         */
 692        for (i = num_counters ; i < num_controls ; ++i) {
 693                if (CTRL_IS_RESERVED(msrs, i))
 694                        release_evntsel_nmi(msrs->controls[i].addr);
 695        }
 696}
 697
 698
 699#ifdef CONFIG_SMP
 700struct op_x86_model_spec const op_p4_ht2_spec = {
 701        .num_counters           = NUM_COUNTERS_HT2,
 702        .num_controls           = NUM_CONTROLS_HT2,
 703        .fill_in_addresses      = &p4_fill_in_addresses,
 704        .setup_ctrs             = &p4_setup_ctrs,
 705        .check_ctrs             = &p4_check_ctrs,
 706        .start                  = &p4_start,
 707        .stop                   = &p4_stop,
 708        .shutdown               = &p4_shutdown
 709};
 710#endif
 711
 712struct op_x86_model_spec const op_p4_spec = {
 713        .num_counters           = NUM_COUNTERS_NON_HT,
 714        .num_controls           = NUM_CONTROLS_NON_HT,
 715        .fill_in_addresses      = &p4_fill_in_addresses,
 716        .setup_ctrs             = &p4_setup_ctrs,
 717        .check_ctrs             = &p4_check_ctrs,
 718        .start                  = &p4_start,
 719        .stop                   = &p4_stop,
 720        .shutdown               = &p4_shutdown
 721};
 722