linux/arch/x86/kernel/cpu/mcheck/p4.c
<<
>>
Prefs
   1/*
   2 * P4 specific Machine Check Exception Reporting
   3 */
   4
   5#include <linux/init.h>
   6#include <linux/types.h>
   7#include <linux/kernel.h>
   8#include <linux/interrupt.h>
   9#include <linux/smp.h>
  10
  11#include <asm/processor.h>
  12#include <asm/system.h>
  13#include <asm/msr.h>
  14#include <asm/apic.h>
  15
  16#include <asm/therm_throt.h>
  17
  18#include "mce.h"
  19
  20/* as supported by the P4/Xeon family */
  21struct intel_mce_extended_msrs {
  22        u32 eax;
  23        u32 ebx;
  24        u32 ecx;
  25        u32 edx;
  26        u32 esi;
  27        u32 edi;
  28        u32 ebp;
  29        u32 esp;
  30        u32 eflags;
  31        u32 eip;
  32        /* u32 *reserved[]; */
  33};
  34
  35static int mce_num_extended_msrs;
  36
  37
  38#ifdef CONFIG_X86_MCE_P4THERMAL
  39static void unexpected_thermal_interrupt(struct pt_regs *regs)
  40{
  41        printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
  42                        smp_processor_id());
  43        add_taint(TAINT_MACHINE_CHECK);
  44}
  45
  46/* P4/Xeon Thermal transition interrupt handler */
  47static void intel_thermal_interrupt(struct pt_regs *regs)
  48{
  49        __u64 msr_val;
  50
  51        ack_APIC_irq();
  52
  53        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
  54        therm_throt_process(msr_val & 0x1);
  55}
  56
  57/* Thermal interrupt handler for this CPU setup */
  58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
  59
  60void smp_thermal_interrupt(struct pt_regs *regs)
  61{
  62        irq_enter();
  63        vendor_thermal_interrupt(regs);
  64        __get_cpu_var(irq_stat).irq_thermal_count++;
  65        irq_exit();
  66}
  67
  68/* P4/Xeon Thermal regulation detect and init */
  69static void intel_init_thermal(struct cpuinfo_x86 *c)
  70{
  71        u32 l, h;
  72        unsigned int cpu = smp_processor_id();
  73
  74        /* Thermal monitoring */
  75        if (!cpu_has(c, X86_FEATURE_ACPI))
  76                return; /* -ENODEV */
  77
  78        /* Clock modulation */
  79        if (!cpu_has(c, X86_FEATURE_ACC))
  80                return; /* -ENODEV */
  81
  82        /* first check if its enabled already, in which case there might
  83         * be some SMM goo which handles it, so we can't even put a handler
  84         * since it might be delivered via SMI already -zwanem.
  85         */
  86        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
  87        h = apic_read(APIC_LVTTHMR);
  88        if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
  89                printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
  90                                cpu);
  91                return; /* -EBUSY */
  92        }
  93
  94        /* check whether a vector already exists, temporarily masked? */
  95        if (h & APIC_VECTOR_MASK) {
  96                printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
  97                                "installed\n",
  98                        cpu, (h & APIC_VECTOR_MASK));
  99                return; /* -EBUSY */
 100        }
 101
 102        /* The temperature transition interrupt handler setup */
 103        h = THERMAL_APIC_VECTOR;                /* our delivery vector */
 104        h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
 105        apic_write(APIC_LVTTHMR, h);
 106
 107        rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
 108        wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
 109
 110        /* ok we're good to go... */
 111        vendor_thermal_interrupt = intel_thermal_interrupt;
 112
 113        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 114        wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
 115
 116        l = apic_read(APIC_LVTTHMR);
 117        apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 118        printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
 119
 120        /* enable thermal throttle processing */
 121        atomic_set(&therm_throt_en, 1);
 122        return;
 123}
 124#endif /* CONFIG_X86_MCE_P4THERMAL */
 125
 126
 127/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
 128static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 129{
 130        u32 h;
 131
 132        rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
 133        rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
 134        rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
 135        rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
 136        rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
 137        rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
 138        rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
 139        rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
 140        rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
 141        rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
 142}
 143
 144static void intel_machine_check(struct pt_regs *regs, long error_code)
 145{
 146        int recover = 1;
 147        u32 alow, ahigh, high, low;
 148        u32 mcgstl, mcgsth;
 149        int i;
 150
 151        rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 152        if (mcgstl & (1<<0))    /* Recoverable ? */
 153                recover = 0;
 154
 155        printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 156                smp_processor_id(), mcgsth, mcgstl);
 157
 158        if (mce_num_extended_msrs > 0) {
 159                struct intel_mce_extended_msrs dbg;
 160                intel_get_extended_msrs(&dbg);
 161                printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
 162                        "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
 163                        "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
 164                        smp_processor_id(), dbg.eip, dbg.eflags,
 165                        dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
 166                        dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
 167        }
 168
 169        for (i = 0; i < nr_mce_banks; i++) {
 170                rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 171                if (high & (1<<31)) {
 172                        char misc[20];
 173                        char addr[24];
 174                        misc[0] = addr[0] = '\0';
 175                        if (high & (1<<29))
 176                                recover |= 1;
 177                        if (high & (1<<25))
 178                                recover |= 2;
 179                        high &= ~(1<<31);
 180                        if (high & (1<<27)) {
 181                                rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 182                                snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
 183                        }
 184                        if (high & (1<<26)) {
 185                                rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 186                                snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 187                        }
 188                        printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 189                                smp_processor_id(), i, high, low, misc, addr);
 190                }
 191        }
 192
 193        if (recover & 2)
 194                panic("CPU context corrupt");
 195        if (recover & 1)
 196                panic("Unable to continue");
 197
 198        printk(KERN_EMERG "Attempting to continue.\n");
 199        /*
 200         * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 201         * recoverable/continuable.This will allow BIOS to look at the MSRs
 202         * for errors if the OS could not log the error.
 203         */
 204        for (i = 0; i < nr_mce_banks; i++) {
 205                u32 msr;
 206                msr = MSR_IA32_MC0_STATUS+i*4;
 207                rdmsr(msr, low, high);
 208                if (high&(1<<31)) {
 209                        /* Clear it */
 210                        wrmsr(msr, 0UL, 0UL);
 211                        /* Serialize */
 212                        wmb();
 213                        add_taint(TAINT_MACHINE_CHECK);
 214                }
 215        }
 216        mcgstl &= ~(1<<2);
 217        wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 218}
 219
 220
 221void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
 222{
 223        u32 l, h;
 224        int i;
 225
 226        machine_check_vector = intel_machine_check;
 227        wmb();
 228
 229        printk(KERN_INFO "Intel machine check architecture supported.\n");
 230        rdmsr(MSR_IA32_MCG_CAP, l, h);
 231        if (l & (1<<8)) /* Control register present ? */
 232                wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 233        nr_mce_banks = l & 0xff;
 234
 235        for (i = 0; i < nr_mce_banks; i++) {
 236                wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 237                wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
 238        }
 239
 240        set_in_cr4(X86_CR4_MCE);
 241        printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
 242                smp_processor_id());
 243
 244        /* Check for P4/Xeon extended MCE MSRs */
 245        rdmsr(MSR_IA32_MCG_CAP, l, h);
 246        if (l & (1<<9)) {/* MCG_EXT_P */
 247                mce_num_extended_msrs = (l >> 16) & 0xff;
 248                printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
 249                                " available\n",
 250                        smp_processor_id(), mce_num_extended_msrs);
 251
 252#ifdef CONFIG_X86_MCE_P4THERMAL
 253                /* Check for P4/Xeon Thermal monitor */
 254                intel_init_thermal(c);
 255#endif
 256        }
 257}
 258
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.