linux/arch/x86/oprofile/op_model_ppro.c
<<
>>
Prefs
   1/*
   2 * @file op_model_ppro.h
   3 * Family 6 perfmon and architectural perfmon MSR operations
   4 *
   5 * @remark Copyright 2002 OProfile authors
   6 * @remark Copyright 2008 Intel Corporation
   7 * @remark Read the file COPYING
   8 *
   9 * @author John Levon
  10 * @author Philippe Elie
  11 * @author Graydon Hoare
  12 * @author Andi Kleen
  13 */
  14
  15#include <linux/oprofile.h>
  16#include <linux/slab.h>
  17#include <asm/ptrace.h>
  18#include <asm/msr.h>
  19#include <asm/apic.h>
  20#include <asm/nmi.h>
  21#include <asm/intel_arch_perfmon.h>
  22
  23#include "op_x86_model.h"
  24#include "op_counter.h"
  25
  26static int num_counters = 2;
  27static int counter_width = 32;
  28
  29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
  30#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
  31
  32#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
  33#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  34#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  35#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
  36#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
  37#define CTRL_CLEAR(x) (x &= (1<<21))
  38#define CTRL_SET_ENABLE(val) (val |= 1<<20)
  39#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
  40#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
  41#define CTRL_SET_UM(val, m) (val |= (m << 8))
  42#define CTRL_SET_EVENT(val, e) (val |= e)
  43
  44static u64 *reset_value;
  45
  46static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  47{
  48        int i;
  49
  50        for (i = 0; i < num_counters; i++) {
  51                if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  52                        msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  53                else
  54                        msrs->counters[i].addr = 0;
  55        }
  56
  57        for (i = 0; i < num_counters; i++) {
  58                if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  59                        msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  60                else
  61                        msrs->controls[i].addr = 0;
  62        }
  63}
  64
  65
  66static void ppro_setup_ctrs(struct op_msrs const * const msrs)
  67{
  68        unsigned int low, high;
  69        int i;
  70
  71        if (!reset_value) {
  72                reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
  73                                        GFP_ATOMIC);
  74                if (!reset_value)
  75                        return;
  76        }
  77
  78        if (cpu_has_arch_perfmon) {
  79                union cpuid10_eax eax;
  80                eax.full = cpuid_eax(0xa);
  81                if (counter_width < eax.split.bit_width)
  82                        counter_width = eax.split.bit_width;
  83        }
  84
  85        /* clear all counters */
  86        for (i = 0 ; i < num_counters; ++i) {
  87                if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
  88                        continue;
  89                CTRL_READ(low, high, msrs, i);
  90                CTRL_CLEAR(low);
  91                CTRL_WRITE(low, high, msrs, i);
  92        }
  93
  94        /* avoid a false detection of ctr overflows in NMI handler */
  95        for (i = 0; i < num_counters; ++i) {
  96                if (unlikely(!CTR_IS_RESERVED(msrs, i)))
  97                        continue;
  98                wrmsrl(msrs->counters[i].addr, -1LL);
  99        }
 100
 101        /* enable active counters */
 102        for (i = 0; i < num_counters; ++i) {
 103                if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 104                        reset_value[i] = counter_config[i].count;
 105
 106                        wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 107
 108                        CTRL_READ(low, high, msrs, i);
 109                        CTRL_CLEAR(low);
 110                        CTRL_SET_ENABLE(low);
 111                        CTRL_SET_USR(low, counter_config[i].user);
 112                        CTRL_SET_KERN(low, counter_config[i].kernel);
 113                        CTRL_SET_UM(low, counter_config[i].unit_mask);
 114                        CTRL_SET_EVENT(low, counter_config[i].event);
 115                        CTRL_WRITE(low, high, msrs, i);
 116                } else {
 117                        reset_value[i] = 0;
 118                }
 119        }
 120}
 121
 122
 123static int ppro_check_ctrs(struct pt_regs * const regs,
 124                           struct op_msrs const * const msrs)
 125{
 126        u64 val;
 127        int i;
 128
 129        for (i = 0 ; i < num_counters; ++i) {
 130                if (!reset_value[i])
 131                        continue;
 132                rdmsrl(msrs->counters[i].addr, val);
 133                if (CTR_OVERFLOWED(val)) {
 134                        oprofile_add_sample(regs, i);
 135                        wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 136                }
 137        }
 138
 139        /* Only P6 based Pentium M need to re-unmask the apic vector but it
 140         * doesn't hurt other P6 variant */
 141        apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
 142
 143        /* We can't work out if we really handled an interrupt. We
 144         * might have caught a *second* counter just after overflowing
 145         * the interrupt for this counter then arrives
 146         * and we don't find a counter that's overflowed, so we
 147         * would return 0 and get dazed + confused. Instead we always
 148         * assume we found an overflow. This sucks.
 149         */
 150        return 1;
 151}
 152
 153
 154static void ppro_start(struct op_msrs const * const msrs)
 155{
 156        unsigned int low, high;
 157        int i;
 158
 159        if (!reset_value)
 160                return;
 161        for (i = 0; i < num_counters; ++i) {
 162                if (reset_value[i]) {
 163                        CTRL_READ(low, high, msrs, i);
 164                        CTRL_SET_ACTIVE(low);
 165                        CTRL_WRITE(low, high, msrs, i);
 166                }
 167        }
 168}
 169
 170
 171static void ppro_stop(struct op_msrs const * const msrs)
 172{
 173        unsigned int low, high;
 174        int i;
 175
 176        if (!reset_value)
 177                return;
 178        for (i = 0; i < num_counters; ++i) {
 179                if (!reset_value[i])
 180                        continue;
 181                CTRL_READ(low, high, msrs, i);
 182                CTRL_SET_INACTIVE(low);
 183                CTRL_WRITE(low, high, msrs, i);
 184        }
 185}
 186
 187static void ppro_shutdown(struct op_msrs const * const msrs)
 188{
 189        int i;
 190
 191        for (i = 0 ; i < num_counters ; ++i) {
 192                if (CTR_IS_RESERVED(msrs, i))
 193                        release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 194        }
 195        for (i = 0 ; i < num_counters ; ++i) {
 196                if (CTRL_IS_RESERVED(msrs, i))
 197                        release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 198        }
 199        if (reset_value) {
 200                kfree(reset_value);
 201                reset_value = NULL;
 202        }
 203}
 204
 205
 206struct op_x86_model_spec op_ppro_spec = {
 207        .num_counters           = 2,    /* can be overriden */
 208        .num_controls           = 2,    /* dito */
 209        .fill_in_addresses      = &ppro_fill_in_addresses,
 210        .setup_ctrs             = &ppro_setup_ctrs,
 211        .check_ctrs             = &ppro_check_ctrs,
 212        .start                  = &ppro_start,
 213        .stop                   = &ppro_stop,
 214        .shutdown               = &ppro_shutdown
 215};
 216
 217/*
 218 * Architectural performance monitoring.
 219 *
 220 * Newer Intel CPUs (Core1+) have support for architectural
 221 * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
 222 * The advantage of this is that it can be done without knowing about
 223 * the specific CPU.
 224 */
 225
 226void arch_perfmon_setup_counters(void)
 227{
 228        union cpuid10_eax eax;
 229
 230        eax.full = cpuid_eax(0xa);
 231
 232        /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
 233        if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 234                current_cpu_data.x86_model == 15) {
 235                eax.split.version_id = 2;
 236                eax.split.num_counters = 2;
 237                eax.split.bit_width = 40;
 238        }
 239
 240        num_counters = eax.split.num_counters;
 241
 242        op_arch_perfmon_spec.num_counters = num_counters;
 243        op_arch_perfmon_spec.num_controls = num_counters;
 244        op_ppro_spec.num_counters = num_counters;
 245        op_ppro_spec.num_controls = num_counters;
 246}
 247
 248struct op_x86_model_spec op_arch_perfmon_spec = {
 249        /* num_counters/num_controls filled in at runtime */
 250        .fill_in_addresses      = &ppro_fill_in_addresses,
 251        /* user space does the cpuid check for available events */
 252        .setup_ctrs             = &ppro_setup_ctrs,
 253        .check_ctrs             = &ppro_check_ctrs,
 254        .start                  = &ppro_start,
 255        .stop                   = &ppro_stop,
 256        .shutdown               = &ppro_shutdown
 257};
 258