linux-old/arch/i386/kernel/nmi.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/i386/nmi.c
   3 *
   4 *  NMI watchdog support on APIC systems
   5 *
   6 *  Started by Ingo Molnar <mingo@redhat.com>
   7 *
   8 *  Fixes:
   9 *  Mikael Pettersson   : AMD K7 support for local APIC NMI watchdog.
  10 *  Mikael Pettersson   : Power Management for local APIC NMI watchdog.
  11 *  Mikael Pettersson   : Pentium 4 support for local APIC NMI watchdog.
  12 */
  13
  14#include <linux/config.h>
  15#include <linux/mm.h>
  16#include <linux/irq.h>
  17#include <linux/delay.h>
  18#include <linux/bootmem.h>
  19#include <linux/smp_lock.h>
  20#include <linux/interrupt.h>
  21#include <linux/mc146818rtc.h>
  22#include <linux/kernel_stat.h>
  23
  24#include <asm/smp.h>
  25#include <asm/mtrr.h>
  26#include <asm/mpspec.h>
  27
  28unsigned int nmi_watchdog = NMI_NONE;
  29static unsigned int nmi_hz = HZ;
  30unsigned int nmi_perfctr_msr;   /* the MSR to reset in NMI handler */
  31extern void show_registers(struct pt_regs *regs);
  32
  33#define K7_EVNTSEL_ENABLE       (1 << 22)
  34#define K7_EVNTSEL_INT          (1 << 20)
  35#define K7_EVNTSEL_OS           (1 << 17)
  36#define K7_EVNTSEL_USR          (1 << 16)
  37#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING    0x76
  38#define K7_NMI_EVENT            K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  39
  40#define P6_EVNTSEL0_ENABLE      (1 << 22)
  41#define P6_EVNTSEL_INT          (1 << 20)
  42#define P6_EVNTSEL_OS           (1 << 17)
  43#define P6_EVNTSEL_USR          (1 << 16)
  44#define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
  45#define P6_NMI_EVENT            P6_EVENT_CPU_CLOCKS_NOT_HALTED
  46
  47#define MSR_P4_MISC_ENABLE      0x1A0
  48#define MSR_P4_MISC_ENABLE_PERF_AVAIL   (1<<7)
  49#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
  50#define MSR_P4_PERFCTR0         0x300
  51#define MSR_P4_CCCR0            0x360
  52#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  53#define P4_ESCR_OS              (1<<3)
  54#define P4_ESCR_USR             (1<<2)
  55#define P4_CCCR_OVF_PMI         (1<<26)
  56#define P4_CCCR_THRESHOLD(N)    ((N)<<20)
  57#define P4_CCCR_COMPLEMENT      (1<<19)
  58#define P4_CCCR_COMPARE         (1<<18)
  59#define P4_CCCR_REQUIRED        (3<<16)
  60#define P4_CCCR_ESCR_SELECT(N)  ((N)<<13)
  61#define P4_CCCR_ENABLE          (1<<12)
  62/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  63   CRU_ESCR0 (with any non-null event selector) through a complemented
  64   max threshold. [IA32-Vol3, Section 14.9.9] */
  65#define MSR_P4_IQ_COUNTER0      0x30C
  66#define MSR_P4_IQ_CCCR0         0x36C
  67#define MSR_P4_CRU_ESCR0        0x3B8
  68#define P4_NMI_CRU_ESCR0        (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
  69#define P4_NMI_IQ_CCCR0 \
  70        (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|      \
  71         P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
  72
  73int __init check_nmi_watchdog (void)
  74{
  75        unsigned int prev_nmi_count[NR_CPUS];
  76        int j, cpu;
  77
  78        printk(KERN_INFO "testing NMI watchdog ... ");
  79
  80        for (j = 0; j < smp_num_cpus; j++) {
  81                cpu = cpu_logical_map(j);
  82                prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
  83        }
  84        sti();
  85        mdelay((10*1000)/nmi_hz); // wait 10 ticks
  86
  87        for (j = 0; j < smp_num_cpus; j++) {
  88                cpu = cpu_logical_map(j);
  89                if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
  90                        printk("CPU#%d: NMI appears to be stuck!\n", cpu);
  91                        return -1;
  92                }
  93        }
  94        printk("OK.\n");
  95
  96        /* now that we know it works we can reduce NMI frequency to
  97           something more reasonable; makes a difference in some configs */
  98        if (nmi_watchdog == NMI_LOCAL_APIC)
  99                nmi_hz = 1;
 100
 101        return 0;
 102}
 103
 104static int __init setup_nmi_watchdog(char *str)
 105{
 106        int nmi;
 107
 108        get_option(&str, &nmi);
 109
 110        if (nmi >= NMI_INVALID)
 111                return 0;
 112        if (nmi == NMI_NONE)
 113                nmi_watchdog = nmi;
 114        /*
 115         * If any other x86 CPU has a local APIC, then
 116         * please test the NMI stuff there and send me the
 117         * missing bits. Right now Intel P6/P4 and AMD K7 only.
 118         */
 119        if ((nmi == NMI_LOCAL_APIC) &&
 120                        (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
 121                        (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
 122                nmi_watchdog = nmi;
 123        if ((nmi == NMI_LOCAL_APIC) &&
 124                        (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
 125                    ((boot_cpu_data.x86 == 6) || (boot_cpu_data.x86 == 15)))
 126                nmi_watchdog = nmi;
 127        /*
 128         * We can enable the IO-APIC watchdog
 129         * unconditionally.
 130         */
 131        if (nmi == NMI_IO_APIC)
 132                nmi_watchdog = nmi;
 133        return 1;
 134}
 135
 136__setup("nmi_watchdog=", setup_nmi_watchdog);
 137
 138#ifdef CONFIG_PM
 139
 140#include <linux/pm.h>
 141
 142struct pm_dev *nmi_pmdev;
 143
 144static void disable_apic_nmi_watchdog(void)
 145{
 146        switch (boot_cpu_data.x86_vendor) {
 147        case X86_VENDOR_AMD:
 148                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
 149                break;
 150        case X86_VENDOR_INTEL:
 151                switch (boot_cpu_data.x86) {
 152                case 6:
 153                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
 154                        break;
 155                case 15:
 156                        wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
 157                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
 158                        break;
 159                }
 160                break;
 161        }
 162}
 163
 164static int nmi_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data)
 165{
 166        switch (rqst) {
 167        case PM_SUSPEND:
 168                disable_apic_nmi_watchdog();
 169                break;
 170        case PM_RESUME:
 171                setup_apic_nmi_watchdog();
 172                break;
 173        }
 174        return 0;
 175}
 176
 177static void nmi_pm_init(void)
 178{
 179        if (!nmi_pmdev)
 180                nmi_pmdev = apic_pm_register(PM_SYS_DEV, 0, nmi_pm_callback);
 181}
 182
 183#define __pminit        /*empty*/
 184
 185#else   /* CONFIG_PM */
 186
 187static inline void nmi_pm_init(void) { }
 188
 189#define __pminit        __init
 190
 191#endif  /* CONFIG_PM */
 192
 193/*
 194 * Activate the NMI watchdog via the local APIC.
 195 * Original code written by Keith Owens.
 196 */
 197
 198static void __pminit clear_msr_range(unsigned int base, unsigned int n)
 199{
 200        unsigned int i;
 201
 202        for(i = 0; i < n; ++i)
 203                wrmsr(base+i, 0, 0);
 204}
 205
 206static void __pminit setup_k7_watchdog(void)
 207{
 208        unsigned int evntsel;
 209
 210        nmi_perfctr_msr = MSR_K7_PERFCTR0;
 211
 212        clear_msr_range(MSR_K7_EVNTSEL0, 4);
 213        clear_msr_range(MSR_K7_PERFCTR0, 4);
 214
 215        evntsel = K7_EVNTSEL_INT
 216                | K7_EVNTSEL_OS
 217                | K7_EVNTSEL_USR
 218                | K7_NMI_EVENT;
 219
 220        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 221        Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
 222        wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
 223        apic_write(APIC_LVTPC, APIC_DM_NMI);
 224        evntsel |= K7_EVNTSEL_ENABLE;
 225        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 226}
 227
 228static void __pminit setup_p6_watchdog(void)
 229{
 230        unsigned int evntsel;
 231
 232        nmi_perfctr_msr = MSR_P6_PERFCTR0;
 233
 234        clear_msr_range(MSR_P6_EVNTSEL0, 2);
 235        clear_msr_range(MSR_P6_PERFCTR0, 2);
 236
 237        evntsel = P6_EVNTSEL_INT
 238                | P6_EVNTSEL_OS
 239                | P6_EVNTSEL_USR
 240                | P6_NMI_EVENT;
 241
 242        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
 243        Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
 244        wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
 245        apic_write(APIC_LVTPC, APIC_DM_NMI);
 246        evntsel |= P6_EVNTSEL0_ENABLE;
 247        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
 248}
 249
 250static int __pminit setup_p4_watchdog(void)
 251{
 252        unsigned int misc_enable, dummy;
 253
 254        rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
 255        if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
 256                return 0;
 257
 258        nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
 259
 260        if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
 261                clear_msr_range(0x3F1, 2);
 262        /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
 263           docs doesn't fully define it, so leave it alone for now. */
 264        clear_msr_range(0x3A0, 31);
 265        clear_msr_range(0x3C0, 6);
 266        clear_msr_range(0x3C8, 6);
 267        clear_msr_range(0x3E0, 2);
 268        clear_msr_range(MSR_P4_CCCR0, 18);
 269        clear_msr_range(MSR_P4_PERFCTR0, 18);
 270
 271        wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
 272        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
 273        Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
 274        wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
 275        apic_write(APIC_LVTPC, APIC_DM_NMI);
 276        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
 277        return 1;
 278}
 279
 280void __pminit setup_apic_nmi_watchdog (void)
 281{
 282        switch (boot_cpu_data.x86_vendor) {
 283        case X86_VENDOR_AMD:
 284                if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
 285                        return;
 286                setup_k7_watchdog();
 287                break;
 288        case X86_VENDOR_INTEL:
 289                switch (boot_cpu_data.x86) {
 290                case 6:
 291                        setup_p6_watchdog();
 292                        break;
 293                case 15:
 294                        if (!setup_p4_watchdog())
 295                                return;
 296                        break;
 297                default:
 298                        return;
 299                }
 300                break;
 301        default:
 302                return;
 303        }
 304        nmi_pm_init();
 305}
 306
 307static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
 308
 309/*
 310 * the best way to detect whether a CPU has a 'hard lockup' problem
 311 * is to check it's local APIC timer IRQ counts. If they are not
 312 * changing then that CPU has some problem.
 313 *
 314 * as these watchdog NMI IRQs are generated on every CPU, we only
 315 * have to check the current processor.
 316 *
 317 * since NMIs dont listen to _any_ locks, we have to be extremely
 318 * careful not to rely on unsafe variables. The printk might lock
 319 * up though, so we have to break up any console locks first ...
 320 * [when there will be more tty-related locks, break them up
 321 *  here too!]
 322 */
 323
 324static unsigned int
 325        last_irq_sums [NR_CPUS],
 326        alert_counter [NR_CPUS];
 327
 328void touch_nmi_watchdog (void)
 329{
 330        int i;
 331
 332        /*
 333         * Just reset the alert counters, (other CPUs might be
 334         * spinning on locks we hold):
 335         */
 336        for (i = 0; i < smp_num_cpus; i++)
 337                alert_counter[i] = 0;
 338}
 339
 340void nmi_watchdog_tick (struct pt_regs * regs)
 341{
 342
 343        /*
 344         * Since current-> is always on the stack, and we always switch
 345         * the stack NMI-atomically, it's safe to use smp_processor_id().
 346         */
 347        int sum, cpu = smp_processor_id();
 348
 349        sum = apic_timer_irqs[cpu];
 350
 351        if (last_irq_sums[cpu] == sum) {
 352                /*
 353                 * Ayiee, looks like this CPU is stuck ...
 354                 * wait a few IRQs (5 seconds) before doing the oops ...
 355                 */
 356                alert_counter[cpu]++;
 357                if (alert_counter[cpu] == 5*nmi_hz) {
 358                        spin_lock(&nmi_print_lock);
 359                        /*
 360                         * We are in trouble anyway, lets at least try
 361                         * to get a message out.
 362                         */
 363                        bust_spinlocks(1);
 364                        printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip);
 365                        show_registers(regs);
 366                        printk("console shuts up ...\n");
 367                        console_silent();
 368                        spin_unlock(&nmi_print_lock);
 369                        bust_spinlocks(0);
 370                        do_exit(SIGSEGV);
 371                }
 372        } else {
 373                last_irq_sums[cpu] = sum;
 374                alert_counter[cpu] = 0;
 375        }
 376        if (nmi_perfctr_msr) {
 377                if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
 378                        /*
 379                         * P4 quirks:
 380                         * - An overflown perfctr will assert its interrupt
 381                         *   until the OVF flag in its CCCR is cleared.
 382                         * - LVTPC is masked on interrupt and must be
 383                         *   unmasked by the LVTPC handler.
 384                         */
 385                        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
 386                        apic_write(APIC_LVTPC, APIC_DM_NMI);
 387                }
 388                wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
 389        }
 390}
 391
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.