linux/arch/x86/kernel/cpu/perfctr-watchdog.c History
<<
>>
Prefs
   1/*
   2 * local apic based NMI watchdog for various CPUs.
   3 *
   4 * This file also handles reservation of performance counters for coordination
   5 * with other users (like oprofile).
   6 *
   7 * Note that these events normally don't tick when the CPU idles. This means
   8 * the frequency varies with CPU load.
   9 *
  10 * Original code for K7/P6 written by Keith Owens
  11 *
  12 */
  13
  14#include <linux/percpu.h>
  15#include <linux/module.h>
  16#include <linux/kernel.h>
  17#include <linux/bitops.h>
  18#include <linux/smp.h>
  19#include <linux/nmi.h>
  20#include <linux/kprobes.h>
  21
  22#include <asm/apic.h>
  23#include <asm/perf_event.h>
  24
  25struct nmi_watchdog_ctlblk {
  26        unsigned int cccr_msr;
  27        unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
  28        unsigned int evntsel_msr;  /* the MSR to select the events to handle */
  29};
  30
  31/* Interface defining a CPU specific perfctr watchdog */
  32struct wd_ops {
  33        int (*reserve)(void);
  34        void (*unreserve)(void);
  35        int (*setup)(unsigned nmi_hz);
  36        void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
  37        void (*stop)(void);
  38        unsigned perfctr;
  39        unsigned evntsel;
  40        u64 checkbit;
  41};
  42
  43static const struct wd_ops *wd_ops;
  44
  45/*
  46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  47 * offset from MSR_P4_BSU_ESCR0.
  48 *
  49 * It will be the max for all platforms (for now)
  50 */
  51#define NMI_MAX_COUNTER_BITS 66
  52
  53/*
  54 * perfctr_nmi_owner tracks the ownership of the perfctr registers:
  55 * evtsel_nmi_owner tracks the ownership of the event selection
  56 * - different performance counters/ event selection may be reserved for
  57 *   different subsystems this reservation system just tries to coordinate
  58 *   things a little
  59 */
  60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
  61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
  62
  63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  64
  65/* converts an msr to an appropriate reservation bit */
  66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  67{
  68        /* returns the bit offset of the performance counter register */
  69        switch (boot_cpu_data.x86_vendor) {
  70        case X86_VENDOR_AMD:
  71                return msr - MSR_K7_PERFCTR0;
  72        case X86_VENDOR_INTEL:
  73                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  74                        return msr - MSR_ARCH_PERFMON_PERFCTR0;
  75
  76                switch (boot_cpu_data.x86) {
  77                case 6:
  78                        return msr - MSR_P6_PERFCTR0;
  79                case 15:
  80                        return msr - MSR_P4_BPU_PERFCTR0;
  81                }
  82        }
  83        return 0;
  84}
  85
  86/*
  87 * converts an msr to an appropriate reservation bit
  88 * returns the bit offset of the event selection register
  89 */
  90static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  91{
  92        /* returns the bit offset of the event selection register */
  93        switch (boot_cpu_data.x86_vendor) {
  94        case X86_VENDOR_AMD:
  95                return msr - MSR_K7_EVNTSEL0;
  96        case X86_VENDOR_INTEL:
  97                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  98                        return msr - MSR_ARCH_PERFMON_EVENTSEL0;
  99
 100                switch (boot_cpu_data.x86) {
 101                case 6:
 102                        return msr - MSR_P6_EVNTSEL0;
 103                case 15:
 104                        return msr - MSR_P4_BSU_ESCR0;
 105                }
 106        }
 107        return 0;
 108
 109}
 110
 111/* checks for a bit availability (hack for oprofile) */
 112int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 113{
 114        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 115
 116        return !test_bit(counter, perfctr_nmi_owner);
 117}
 118
 119/* checks the an msr for availability */
 120int avail_to_resrv_perfctr_nmi(unsigned int msr)
 121{
 122        unsigned int counter;
 123
 124        counter = nmi_perfctr_msr_to_bit(msr);
 125        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 126
 127        return !test_bit(counter, perfctr_nmi_owner);
 128}
 129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 130
 131int reserve_perfctr_nmi(unsigned int msr)
 132{
 133        unsigned int counter;
 134
 135        counter = nmi_perfctr_msr_to_bit(msr);
 136        /* register not managed by the allocator? */
 137        if (counter > NMI_MAX_COUNTER_BITS)
 138                return 1;
 139
 140        if (!test_and_set_bit(counter, perfctr_nmi_owner))
 141                return 1;
 142        return 0;
 143}
 144EXPORT_SYMBOL(reserve_perfctr_nmi);
 145
 146void release_perfctr_nmi(unsigned int msr)
 147{
 148        unsigned int counter;
 149
 150        counter = nmi_perfctr_msr_to_bit(msr);
 151        /* register not managed by the allocator? */
 152        if (counter > NMI_MAX_COUNTER_BITS)
 153                return;
 154
 155        clear_bit(counter, perfctr_nmi_owner);
 156}
 157EXPORT_SYMBOL(release_perfctr_nmi);
 158
 159int reserve_evntsel_nmi(unsigned int msr)
 160{
 161        unsigned int counter;
 162
 163        counter = nmi_evntsel_msr_to_bit(msr);
 164        /* register not managed by the allocator? */
 165        if (counter > NMI_MAX_COUNTER_BITS)
 166                return 1;
 167
 168        if (!test_and_set_bit(counter, evntsel_nmi_owner))
 169                return 1;
 170        return 0;
 171}
 172EXPORT_SYMBOL(reserve_evntsel_nmi);
 173
 174void release_evntsel_nmi(unsigned int msr)
 175{
 176        unsigned int counter;
 177
 178        counter = nmi_evntsel_msr_to_bit(msr);
 179        /* register not managed by the allocator? */
 180        if (counter > NMI_MAX_COUNTER_BITS)
 181                return;
 182
 183        clear_bit(counter, evntsel_nmi_owner);
 184}
 185EXPORT_SYMBOL(release_evntsel_nmi);
 186
 187void disable_lapic_nmi_watchdog(void)
 188{
 189        BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
 190
 191        if (atomic_read(&nmi_active) <= 0)
 192                return;
 193
 194        on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 195
 196        if (wd_ops)
 197                wd_ops->unreserve();
 198
 199        BUG_ON(atomic_read(&nmi_active) != 0);
 200}
 201
 202void enable_lapic_nmi_watchdog(void)
 203{
 204        BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
 205
 206        /* are we already enabled */
 207        if (atomic_read(&nmi_active) != 0)
 208                return;
 209
 210        /* are we lapic aware */
 211        if (!wd_ops)
 212                return;
 213        if (!wd_ops->reserve()) {
 214                printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
 215                return;
 216        }
 217
 218        on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
 219        touch_nmi_watchdog();
 220}
 221
 222/*
 223 * Activate the NMI watchdog via the local APIC.
 224 */
 225
 226static unsigned int adjust_for_32bit_ctr(unsigned int hz)
 227{
 228        u64 counter_val;
 229        unsigned int retval = hz;
 230
 231        /*
 232         * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
 233         * are writable, with higher bits sign extending from bit 31.
 234         * So, we can only program the counter with 31 bit values and
 235         * 32nd bit should be 1, for 33.. to be 1.
 236         * Find the appropriate nmi_hz
 237         */
 238        counter_val = (u64)cpu_khz * 1000;
 239        do_div(counter_val, retval);
 240        if (counter_val > 0x7fffffffULL) {
 241                u64 count = (u64)cpu_khz * 1000;
 242                do_div(count, 0x7fffffffUL);
 243                retval = count + 1;
 244        }
 245        return retval;
 246}
 247
 248static void write_watchdog_counter(unsigned int perfctr_msr,
 249                                const char *descr, unsigned nmi_hz)
 250{
 251        u64 count = (u64)cpu_khz * 1000;
 252
 253        do_div(count, nmi_hz);
 254        if (descr)
 255                pr_debug("setting %s to -0x%08Lx\n", descr, count);
 256        wrmsrl(perfctr_msr, 0 - count);
 257}
 258
 259static void write_watchdog_counter32(unsigned int perfctr_msr,
 260                                const char *descr, unsigned nmi_hz)
 261{
 262        u64 count = (u64)cpu_khz * 1000;
 263
 264        do_div(count, nmi_hz);
 265        if (descr)
 266                pr_debug("setting %s to -0x%08Lx\n", descr, count);
 267        wrmsr(perfctr_msr, (u32)(-count), 0);
 268}
 269
 270/*
 271 * AMD K7/K8/Family10h/Family11h support.
 272 * AMD keeps this interface nicely stable so there is not much variety
 273 */
 274#define K7_EVNTSEL_ENABLE       (1 << 22)
 275#define K7_EVNTSEL_INT          (1 << 20)
 276#define K7_EVNTSEL_OS           (1 << 17)
 277#define K7_EVNTSEL_USR          (1 << 16)
 278#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING    0x76
 279#define K7_NMI_EVENT            K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 280
 281static int setup_k7_watchdog(unsigned nmi_hz)
 282{
 283        unsigned int perfctr_msr, evntsel_msr;
 284        unsigned int evntsel;
 285        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 286
 287        perfctr_msr = wd_ops->perfctr;
 288        evntsel_msr = wd_ops->evntsel;
 289
 290        wrmsrl(perfctr_msr, 0UL);
 291
 292        evntsel = K7_EVNTSEL_INT
 293                | K7_EVNTSEL_OS
 294                | K7_EVNTSEL_USR
 295                | K7_NMI_EVENT;
 296
 297        /* setup the timer */
 298        wrmsr(evntsel_msr, evntsel, 0);
 299        write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
 300
 301        /* initialize the wd struct before enabling */
 302        wd->perfctr_msr = perfctr_msr;
 303        wd->evntsel_msr = evntsel_msr;
 304        wd->cccr_msr = 0;  /* unused */
 305
 306        /* ok, everything is initialized, announce that we're set */
 307        cpu_nmi_set_wd_enabled();
 308
 309        apic_write(APIC_LVTPC, APIC_DM_NMI);
 310        evntsel |= K7_EVNTSEL_ENABLE;
 311        wrmsr(evntsel_msr, evntsel, 0);
 312
 313        return 1;
 314}
 315
 316static void single_msr_stop_watchdog(void)
 317{
 318        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 319
 320        wrmsr(wd->evntsel_msr, 0, 0);
 321}
 322
 323static int single_msr_reserve(void)
 324{
 325        if (!reserve_perfctr_nmi(wd_ops->perfctr))
 326                return 0;
 327
 328        if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
 329                release_perfctr_nmi(wd_ops->perfctr);
 330                return 0;
 331        }
 332        return 1;
 333}
 334
 335static void single_msr_unreserve(void)
 336{
 337        release_evntsel_nmi(wd_ops->evntsel);
 338        release_perfctr_nmi(wd_ops->perfctr);
 339}
 340
 341static void __kprobes
 342single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 343{
 344        /* start the cycle over again */
 345        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
 346}
 347
 348static const struct wd_ops k7_wd_ops = {
 349        .reserve        = single_msr_reserve,
 350        .unreserve      = single_msr_unreserve,
 351        .setup          = setup_k7_watchdog,
 352        .rearm          = single_msr_rearm,
 353        .stop           = single_msr_stop_watchdog,
 354        .perfctr        = MSR_K7_PERFCTR0,
 355        .evntsel        = MSR_K7_EVNTSEL0,
 356        .checkbit       = 1ULL << 47,
 357};
 358
 359/*
 360 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
 361 */
 362#define P6_EVNTSEL0_ENABLE      (1 << 22)
 363#define P6_EVNTSEL_INT          (1 << 20)
 364#define P6_EVNTSEL_OS           (1 << 17)
 365#define P6_EVNTSEL_USR          (1 << 16)
 366#define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
 367#define P6_NMI_EVENT            P6_EVENT_CPU_CLOCKS_NOT_HALTED
 368
 369static int setup_p6_watchdog(unsigned nmi_hz)
 370{
 371        unsigned int perfctr_msr, evntsel_msr;
 372        unsigned int evntsel;
 373        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 374
 375        perfctr_msr = wd_ops->perfctr;
 376        evntsel_msr = wd_ops->evntsel;
 377
 378        /* KVM doesn't implement this MSR */
 379        if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
 380                return 0;
 381
 382        evntsel = P6_EVNTSEL_INT
 383                | P6_EVNTSEL_OS
 384                | P6_EVNTSEL_USR
 385                | P6_NMI_EVENT;
 386
 387        /* setup the timer */
 388        wrmsr(evntsel_msr, evntsel, 0);
 389        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 390        write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
 391
 392        /* initialize the wd struct before enabling */
 393        wd->perfctr_msr = perfctr_msr;
 394        wd->evntsel_msr = evntsel_msr;
 395        wd->cccr_msr = 0;  /* unused */
 396
 397        /* ok, everything is initialized, announce that we're set */
 398        cpu_nmi_set_wd_enabled();
 399
 400        apic_write(APIC_LVTPC, APIC_DM_NMI);
 401        evntsel |= P6_EVNTSEL0_ENABLE;
 402        wrmsr(evntsel_msr, evntsel, 0);
 403
 404        return 1;
 405}
 406
 407static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 408{
 409        /*
 410         * P6 based Pentium M need to re-unmask
 411         * the apic vector but it doesn't hurt
 412         * other P6 variant.
 413         * ArchPerfom/Core Duo also needs this
 414         */
 415        apic_write(APIC_LVTPC, APIC_DM_NMI);
 416
 417        /* P6/ARCH_PERFMON has 32 bit counter write */
 418        write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
 419}
 420
 421static const struct wd_ops p6_wd_ops = {
 422        .reserve        = single_msr_reserve,
 423        .unreserve      = single_msr_unreserve,
 424        .setup          = setup_p6_watchdog,
 425        .rearm          = p6_rearm,
 426        .stop           = single_msr_stop_watchdog,
 427        .perfctr        = MSR_P6_PERFCTR0,
 428        .evntsel        = MSR_P6_EVNTSEL0,
 429        .checkbit       = 1ULL << 39,
 430};
 431
 432/*
 433 * Intel P4 performance counters.
 434 * By far the most complicated of all.
 435 */
 436#define MSR_P4_MISC_ENABLE_PERF_AVAIL   (1 << 7)
 437#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
 438#define P4_ESCR_OS              (1 << 3)
 439#define P4_ESCR_USR             (1 << 2)
 440#define P4_CCCR_OVF_PMI0        (1 << 26)
 441#define P4_CCCR_OVF_PMI1        (1 << 27)
 442#define P4_CCCR_THRESHOLD(N)    ((N) << 20)
 443#define P4_CCCR_COMPLEMENT      (1 << 19)
 444#define P4_CCCR_COMPARE         (1 << 18)
 445#define P4_CCCR_REQUIRED        (3 << 16)
 446#define P4_CCCR_ESCR_SELECT(N)  ((N) << 13)
 447#define P4_CCCR_ENABLE          (1 << 12)
 448#define P4_CCCR_OVF             (1 << 31)
 449
 450#define P4_CONTROLS 18
 451static unsigned int p4_controls[18] = {
 452        MSR_P4_BPU_CCCR0,
 453        MSR_P4_BPU_CCCR1,
 454        MSR_P4_BPU_CCCR2,
 455        MSR_P4_BPU_CCCR3,
 456        MSR_P4_MS_CCCR0,
 457        MSR_P4_MS_CCCR1,
 458        MSR_P4_MS_CCCR2,
 459        MSR_P4_MS_CCCR3,
 460        MSR_P4_FLAME_CCCR0,
 461        MSR_P4_FLAME_CCCR1,
 462        MSR_P4_FLAME_CCCR2,
 463        MSR_P4_FLAME_CCCR3,
 464        MSR_P4_IQ_CCCR0,
 465        MSR_P4_IQ_CCCR1,
 466        MSR_P4_IQ_CCCR2,
 467        MSR_P4_IQ_CCCR3,
 468        MSR_P4_IQ_CCCR4,
 469        MSR_P4_IQ_CCCR5,
 470};
 471/*
 472 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
 473 * CRU_ESCR0 (with any non-null event selector) through a complemented
 474 * max threshold. [IA32-Vol3, Section 14.9.9]
 475 */
 476static int setup_p4_watchdog(unsigned nmi_hz)
 477{
 478        unsigned int perfctr_msr, evntsel_msr, cccr_msr;
 479        unsigned int evntsel, cccr_val;
 480        unsigned int misc_enable, dummy;
 481        unsigned int ht_num;
 482        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 483
 484        rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
 485        if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
 486                return 0;
 487
 488#ifdef CONFIG_SMP
 489        /* detect which hyperthread we are on */
 490        if (smp_num_siblings == 2) {
 491                unsigned int ebx, apicid;
 492
 493                ebx = cpuid_ebx(1);
 494                apicid = (ebx >> 24) & 0xff;
 495                ht_num = apicid & 1;
 496        } else
 497#endif
 498                ht_num = 0;
 499
 500        /*
 501         * performance counters are shared resources
 502         * assign each hyperthread its own set
 503         * (re-use the ESCR0 register, seems safe
 504         * and keeps the cccr_val the same)
 505         */
 506        if (!ht_num) {
 507                /* logical cpu 0 */
 508                perfctr_msr = MSR_P4_IQ_PERFCTR0;
 509                evntsel_msr = MSR_P4_CRU_ESCR0;
 510                cccr_msr = MSR_P4_IQ_CCCR0;
 511                cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
 512
 513                /*
 514                 * If we're on the kdump kernel or other situation, we may
 515                 * still have other performance counter registers set to
 516                 * interrupt and they'll keep interrupting forever because
 517                 * of the P4_CCCR_OVF quirk. So we need to ACK all the
 518                 * pending interrupts and disable all the registers here,
 519                 * before reenabling the NMI delivery. Refer to p4_rearm()
 520                 * about the P4_CCCR_OVF quirk.
 521                 */
 522                if (reset_devices) {
 523                        unsigned int low, high;
 524                        int i;
 525
 526                        for (i = 0; i < P4_CONTROLS; i++) {
 527                                rdmsr(p4_controls[i], low, high);
 528                                low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
 529                                wrmsr(p4_controls[i], low, high);
 530                        }
 531                }
 532        } else {
 533                /* logical cpu 1 */
 534                perfctr_msr = MSR_P4_IQ_PERFCTR1;
 535                evntsel_msr = MSR_P4_CRU_ESCR0;
 536                cccr_msr = MSR_P4_IQ_CCCR1;
 537
 538                /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
 539                if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
 540                        cccr_val = P4_CCCR_OVF_PMI0;
 541                else
 542                        cccr_val = P4_CCCR_OVF_PMI1;
 543                cccr_val |= P4_CCCR_ESCR_SELECT(4);
 544        }
 545
 546        evntsel = P4_ESCR_EVENT_SELECT(0x3F)
 547                | P4_ESCR_OS
 548                | P4_ESCR_USR;
 549
 550        cccr_val |= P4_CCCR_THRESHOLD(15)
 551                 | P4_CCCR_COMPLEMENT
 552                 | P4_CCCR_COMPARE
 553                 | P4_CCCR_REQUIRED;
 554
 555        wrmsr(evntsel_msr, evntsel, 0);
 556        wrmsr(cccr_msr, cccr_val, 0);
 557        write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
 558
 559        wd->perfctr_msr = perfctr_msr;
 560        wd->evntsel_msr = evntsel_msr;
 561        wd->cccr_msr = cccr_msr;
 562
 563        /* ok, everything is initialized, announce that we're set */
 564        cpu_nmi_set_wd_enabled();
 565
 566        apic_write(APIC_LVTPC, APIC_DM_NMI);
 567        cccr_val |= P4_CCCR_ENABLE;
 568        wrmsr(cccr_msr, cccr_val, 0);
 569        return 1;
 570}
 571
 572static void stop_p4_watchdog(void)
 573{
 574        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 575        wrmsr(wd->cccr_msr, 0, 0);
 576        wrmsr(wd->evntsel_msr, 0, 0);
 577}
 578
 579static int p4_reserve(void)
 580{
 581        if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
 582                return 0;
 583#ifdef CONFIG_SMP
 584        if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
 585                goto fail1;
 586#endif
 587        if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
 588                goto fail2;
 589        /* RED-PEN why is ESCR1 not reserved here? */
 590        return 1;
 591 fail2:
 592#ifdef CONFIG_SMP
 593        if (smp_num_siblings > 1)
 594                release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
 595 fail1:
 596#endif
 597        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 598        return 0;
 599}
 600
 601static void p4_unreserve(void)
 602{
 603#ifdef CONFIG_SMP
 604        if (smp_num_siblings > 1)
 605                release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
 606#endif
 607        release_evntsel_nmi(MSR_P4_CRU_ESCR0);
 608        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 609}
 610
 611static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 612{
 613        unsigned dummy;
 614        /*
 615         * P4 quirks:
 616         * - An overflown perfctr will assert its interrupt
 617         *   until the OVF flag in its CCCR is cleared.
 618         * - LVTPC is masked on interrupt and must be
 619         *   unmasked by the LVTPC handler.
 620         */
 621        rdmsrl(wd->cccr_msr, dummy);
 622        dummy &= ~P4_CCCR_OVF;
 623        wrmsrl(wd->cccr_msr, dummy);
 624        apic_write(APIC_LVTPC, APIC_DM_NMI);
 625        /* start the cycle over again */
 626        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
 627}
 628
 629static const struct wd_ops p4_wd_ops = {
 630        .reserve        = p4_reserve,
 631        .unreserve      = p4_unreserve,
 632        .setup          = setup_p4_watchdog,
 633        .rearm          = p4_rearm,
 634        .stop           = stop_p4_watchdog,
 635        /* RED-PEN this is wrong for the other sibling */
 636        .perfctr        = MSR_P4_BPU_PERFCTR0,
 637        .evntsel        = MSR_P4_BSU_ESCR0,
 638        .checkbit       = 1ULL << 39,
 639};
 640
 641/*
 642 * Watchdog using the Intel architected PerfMon.
 643 * Used for Core2 and hopefully all future Intel CPUs.
 644 */
 645#define ARCH_PERFMON_NMI_EVENT_SEL      ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
 646#define ARCH_PERFMON_NMI_EVENT_UMASK    ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
 647
 648static struct wd_ops intel_arch_wd_ops;
 649
 650static int setup_intel_arch_watchdog(unsigned nmi_hz)
 651{
 652        unsigned int ebx;
 653        union cpuid10_eax eax;
 654        unsigned int unused;
 655        unsigned int perfctr_msr, evntsel_msr;
 656        unsigned int evntsel;
 657        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 658
 659        /*
 660         * Check whether the Architectural PerfMon supports
 661         * Unhalted Core Cycles Event or not.
 662         * NOTE: Corresponding bit = 0 in ebx indicates event present.
 663         */
 664        cpuid(10, &(eax.full), &ebx, &unused, &unused);
 665        if ((eax.split.mask_length <
 666                        (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
 667            (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
 668                return 0;
 669
 670        perfctr_msr = wd_ops->perfctr;
 671        evntsel_msr = wd_ops->evntsel;
 672
 673        wrmsrl(perfctr_msr, 0UL);
 674
 675        evntsel = ARCH_PERFMON_EVENTSEL_INT
 676                | ARCH_PERFMON_EVENTSEL_OS
 677                | ARCH_PERFMON_EVENTSEL_USR
 678                | ARCH_PERFMON_NMI_EVENT_SEL
 679                | ARCH_PERFMON_NMI_EVENT_UMASK;
 680
 681        /* setup the timer */
 682        wrmsr(evntsel_msr, evntsel, 0);
 683        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 684        write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
 685
 686        wd->perfctr_msr = perfctr_msr;
 687        wd->evntsel_msr = evntsel_msr;
 688        wd->cccr_msr = 0;  /* unused */
 689
 690        /* ok, everything is initialized, announce that we're set */
 691        cpu_nmi_set_wd_enabled();
 692
 693        apic_write(APIC_LVTPC, APIC_DM_NMI);
 694        evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 695        wrmsr(evntsel_msr, evntsel, 0);
 696        intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 697        return 1;
 698}
 699
 700static struct wd_ops intel_arch_wd_ops __read_mostly = {
 701        .reserve        = single_msr_reserve,
 702        .unreserve      = single_msr_unreserve,
 703        .setup          = setup_intel_arch_watchdog,
 704        .rearm          = p6_rearm,
 705        .stop           = single_msr_stop_watchdog,
 706        .perfctr        = MSR_ARCH_PERFMON_PERFCTR1,
 707        .evntsel        = MSR_ARCH_PERFMON_EVENTSEL1,
 708};
 709
 710static void probe_nmi_watchdog(void)
 711{
 712        switch (boot_cpu_data.x86_vendor) {
 713        case X86_VENDOR_AMD:
 714                if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
 715                    boot_cpu_data.x86 != 16)
 716                        return;
 717                wd_ops = &k7_wd_ops;
 718                break;
 719        case X86_VENDOR_INTEL:
 720                /* Work around where perfctr1 doesn't have a working enable
 721                 * bit as described in the following errata:
 722                 * AE49 Core Duo and Intel Core Solo 65 nm
 723                 * AN49 Intel Pentium Dual-Core
 724                 * AF49 Dual-Core Intel Xeon Processor LV
 725                 */
 726                if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
 727                    ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
 728                     boot_cpu_data.x86_mask == 4))) {
 729                        intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
 730                        intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
 731                }
 732                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 733                        wd_ops = &intel_arch_wd_ops;
 734                        break;
 735                }
 736                switch (boot_cpu_data.x86) {
 737                case 6:
 738                        if (boot_cpu_data.x86_model > 13)
 739                                return;
 740
 741                        wd_ops = &p6_wd_ops;
 742                        break;
 743                case 15:
 744                        wd_ops = &p4_wd_ops;
 745                        break;
 746                default:
 747                        return;
 748                }
 749                break;
 750        }
 751}
 752
 753/* Interface to nmi.c */
 754
 755int lapic_watchdog_init(unsigned nmi_hz)
 756{
 757        if (!wd_ops) {
 758                probe_nmi_watchdog();
 759                if (!wd_ops) {
 760                        printk(KERN_INFO "NMI watchdog: CPU not supported\n");
 761                        return -1;
 762                }
 763
 764                if (!wd_ops->reserve()) {
 765                        printk(KERN_ERR
 766                                "NMI watchdog: cannot reserve perfctrs\n");
 767                        return -1;
 768                }
 769        }
 770
 771        if (!(wd_ops->setup(nmi_hz))) {
 772                printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
 773                       raw_smp_processor_id());
 774                return -1;
 775        }
 776
 777        return 0;
 778}
 779
 780void lapic_watchdog_stop(void)
 781{
 782        if (wd_ops)
 783                wd_ops->stop();
 784}
 785
 786unsigned lapic_adjust_nmi_hz(unsigned hz)
 787{
 788        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 789        if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
 790            wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
 791                hz = adjust_for_32bit_ctr(hz);
 792        return hz;
 793}
 794
 795int __kprobes lapic_wd_event(unsigned nmi_hz)
 796{
 797        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 798        u64 ctr;
 799
 800        rdmsrl(wd->perfctr_msr, ctr);
 801        if (ctr & wd_ops->checkbit) /* perfctr still running? */
 802                return 0;
 803
 804        wd_ops->rearm(wd, nmi_hz);
 805        return 1;
 806}
 807
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.