linux/drivers/edac/amd64_edac.c
<<
>>
Prefs
   1#include "amd64_edac.h"
   2#include <asm/k8.h>
   3
   4static struct edac_pci_ctl_info *amd64_ctl_pci;
   5
   6static int report_gart_errors;
   7module_param(report_gart_errors, int, 0644);
   8
   9/*
  10 * Set by command line parameter. If BIOS has enabled the ECC, this override is
  11 * cleared to prevent re-enabling the hardware by this driver.
  12 */
  13static int ecc_enable_override;
  14module_param(ecc_enable_override, int, 0644);
  15
  16/* Lookup table for all possible MC control instances */
  17struct amd64_pvt;
  18static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
  19static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES];
  20
  21/*
  22 * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
  23 * for DDR2 DRAM mapping.
  24 */
  25u32 revf_quad_ddr2_shift[] = {
  26        0,      /* 0000b NULL DIMM (128mb) */
  27        28,     /* 0001b 256mb */
  28        29,     /* 0010b 512mb */
  29        29,     /* 0011b 512mb */
  30        29,     /* 0100b 512mb */
  31        30,     /* 0101b 1gb */
  32        30,     /* 0110b 1gb */
  33        31,     /* 0111b 2gb */
  34        31,     /* 1000b 2gb */
  35        32,     /* 1001b 4gb */
  36        32,     /* 1010b 4gb */
  37        33,     /* 1011b 8gb */
  38        0,      /* 1100b future */
  39        0,      /* 1101b future */
  40        0,      /* 1110b future */
  41        0       /* 1111b future */
  42};
  43
  44/*
  45 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
  46 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
  47 * or higher value'.
  48 *
  49 *FIXME: Produce a better mapping/linearisation.
  50 */
  51
  52struct scrubrate scrubrates[] = {
  53        { 0x01, 1600000000UL},
  54        { 0x02, 800000000UL},
  55        { 0x03, 400000000UL},
  56        { 0x04, 200000000UL},
  57        { 0x05, 100000000UL},
  58        { 0x06, 50000000UL},
  59        { 0x07, 25000000UL},
  60        { 0x08, 12284069UL},
  61        { 0x09, 6274509UL},
  62        { 0x0A, 3121951UL},
  63        { 0x0B, 1560975UL},
  64        { 0x0C, 781440UL},
  65        { 0x0D, 390720UL},
  66        { 0x0E, 195300UL},
  67        { 0x0F, 97650UL},
  68        { 0x10, 48854UL},
  69        { 0x11, 24427UL},
  70        { 0x12, 12213UL},
  71        { 0x13, 6101UL},
  72        { 0x14, 3051UL},
  73        { 0x15, 1523UL},
  74        { 0x16, 761UL},
  75        { 0x00, 0UL},        /* scrubbing off */
  76};
  77
  78/*
  79 * Memory scrubber control interface. For K8, memory scrubbing is handled by
  80 * hardware and can involve L2 cache, dcache as well as the main memory. With
  81 * F10, this is extended to L3 cache scrubbing on CPU models sporting that
  82 * functionality.
  83 *
  84 * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
  85 * (dram) over to cache lines. This is nasty, so we will use bandwidth in
  86 * bytes/sec for the setting.
  87 *
  88 * Currently, we only do dram scrubbing. If the scrubbing is done in software on
  89 * other archs, we might not have access to the caches directly.
  90 */
  91
  92/*
  93 * scan the scrub rate mapping table for a close or matching bandwidth value to
  94 * issue. If requested is too big, then use last maximum value found.
  95 */
  96static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
  97                                       u32 min_scrubrate)
  98{
  99        u32 scrubval;
 100        int i;
 101
 102        /*
 103         * map the configured rate (new_bw) to a value specific to the AMD64
 104         * memory controller and apply to register. Search for the first
 105         * bandwidth entry that is greater or equal than the setting requested
 106         * and program that. If at last entry, turn off DRAM scrubbing.
 107         */
 108        for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
 109                /*
 110                 * skip scrub rates which aren't recommended
 111                 * (see F10 BKDG, F3x58)
 112                 */
 113                if (scrubrates[i].scrubval < min_scrubrate)
 114                        continue;
 115
 116                if (scrubrates[i].bandwidth <= new_bw)
 117                        break;
 118
 119                /*
 120                 * if no suitable bandwidth found, turn off DRAM scrubbing
 121                 * entirely by falling back to the last element in the
 122                 * scrubrates array.
 123                 */
 124        }
 125
 126        scrubval = scrubrates[i].scrubval;
 127        if (scrubval)
 128                edac_printk(KERN_DEBUG, EDAC_MC,
 129                            "Setting scrub rate bandwidth: %u\n",
 130                            scrubrates[i].bandwidth);
 131        else
 132                edac_printk(KERN_DEBUG, EDAC_MC, "Turning scrubbing off.\n");
 133
 134        pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F);
 135
 136        return 0;
 137}
 138
 139static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth)
 140{
 141        struct amd64_pvt *pvt = mci->pvt_info;
 142        u32 min_scrubrate = 0x0;
 143
 144        switch (boot_cpu_data.x86) {
 145        case 0xf:
 146                min_scrubrate = K8_MIN_SCRUB_RATE_BITS;
 147                break;
 148        case 0x10:
 149                min_scrubrate = F10_MIN_SCRUB_RATE_BITS;
 150                break;
 151        case 0x11:
 152                min_scrubrate = F11_MIN_SCRUB_RATE_BITS;
 153                break;
 154
 155        default:
 156                amd64_printk(KERN_ERR, "Unsupported family!\n");
 157                break;
 158        }
 159        return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth,
 160                        min_scrubrate);
 161}
 162
 163static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
 164{
 165        struct amd64_pvt *pvt = mci->pvt_info;
 166        u32 scrubval = 0;
 167        int status = -1, i, ret = 0;
 168
 169        ret = pci_read_config_dword(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval);
 170        if (ret)
 171                debugf0("Reading K8_SCRCTRL failed\n");
 172
 173        scrubval = scrubval & 0x001F;
 174
 175        edac_printk(KERN_DEBUG, EDAC_MC,
 176                    "pci-read, sdram scrub control value: %d \n", scrubval);
 177
 178        for (i = 0; ARRAY_SIZE(scrubrates); i++) {
 179                if (scrubrates[i].scrubval == scrubval) {
 180                        *bw = scrubrates[i].bandwidth;
 181                        status = 0;
 182                        break;
 183                }
 184        }
 185
 186        return status;
 187}
 188
 189/* Map from a CSROW entry to the mask entry that operates on it */
 190static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
 191{
 192        if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F)
 193                return csrow;
 194        else
 195                return csrow >> 1;
 196}
 197
 198/* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
 199static u32 amd64_get_dct_base(struct amd64_pvt *pvt, int dct, int csrow)
 200{
 201        if (dct == 0)
 202                return pvt->dcsb0[csrow];
 203        else
 204                return pvt->dcsb1[csrow];
 205}
 206
 207/*
 208 * Return the 'mask' address the i'th CS entry. This function is needed because
 209 * there number of DCSM registers on Rev E and prior vs Rev F and later is
 210 * different.
 211 */
 212static u32 amd64_get_dct_mask(struct amd64_pvt *pvt, int dct, int csrow)
 213{
 214        if (dct == 0)
 215                return pvt->dcsm0[amd64_map_to_dcs_mask(pvt, csrow)];
 216        else
 217                return pvt->dcsm1[amd64_map_to_dcs_mask(pvt, csrow)];
 218}
 219
 220
 221/*
 222 * In *base and *limit, pass back the full 40-bit base and limit physical
 223 * addresses for the node given by node_id.  This information is obtained from
 224 * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
 225 * base and limit addresses are of type SysAddr, as defined at the start of
 226 * section 3.4.4 (p. 70).  They are the lowest and highest physical addresses
 227 * in the address range they represent.
 228 */
 229static void amd64_get_base_and_limit(struct amd64_pvt *pvt, int node_id,
 230                               u64 *base, u64 *limit)
 231{
 232        *base = pvt->dram_base[node_id];
 233        *limit = pvt->dram_limit[node_id];
 234}
 235
 236/*
 237 * Return 1 if the SysAddr given by sys_addr matches the base/limit associated
 238 * with node_id
 239 */
 240static int amd64_base_limit_match(struct amd64_pvt *pvt,
 241                                        u64 sys_addr, int node_id)
 242{
 243        u64 base, limit, addr;
 244
 245        amd64_get_base_and_limit(pvt, node_id, &base, &limit);
 246
 247        /* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
 248         * all ones if the most significant implemented address bit is 1.
 249         * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
 250         * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
 251         * Application Programming.
 252         */
 253        addr = sys_addr & 0x000000ffffffffffull;
 254
 255        return (addr >= base) && (addr <= limit);
 256}
 257
 258/*
 259 * Attempt to map a SysAddr to a node. On success, return a pointer to the
 260 * mem_ctl_info structure for the node that the SysAddr maps to.
 261 *
 262 * On failure, return NULL.
 263 */
 264static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
 265                                                u64 sys_addr)
 266{
 267        struct amd64_pvt *pvt;
 268        int node_id;
 269        u32 intlv_en, bits;
 270
 271        /*
 272         * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
 273         * 3.4.4.2) registers to map the SysAddr to a node ID.
 274         */
 275        pvt = mci->pvt_info;
 276
 277        /*
 278         * The value of this field should be the same for all DRAM Base
 279         * registers.  Therefore we arbitrarily choose to read it from the
 280         * register for node 0.
 281         */
 282        intlv_en = pvt->dram_IntlvEn[0];
 283
 284        if (intlv_en == 0) {
 285                for (node_id = 0; node_id < DRAM_REG_COUNT; node_id++) {
 286                        if (amd64_base_limit_match(pvt, sys_addr, node_id))
 287                                goto found;
 288                }
 289                goto err_no_match;
 290        }
 291
 292        if (unlikely((intlv_en != 0x01) &&
 293                     (intlv_en != 0x03) &&
 294                     (intlv_en != 0x07))) {
 295                amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
 296                             "IntlvEn field of DRAM Base Register for node 0: "
 297                             "this probably indicates a BIOS bug.\n", intlv_en);
 298                return NULL;
 299        }
 300
 301        bits = (((u32) sys_addr) >> 12) & intlv_en;
 302
 303        for (node_id = 0; ; ) {
 304                if ((pvt->dram_IntlvSel[node_id] & intlv_en) == bits)
 305                        break;  /* intlv_sel field matches */
 306
 307                if (++node_id >= DRAM_REG_COUNT)
 308                        goto err_no_match;
 309        }
 310
 311        /* sanity test for sys_addr */
 312        if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
 313                amd64_printk(KERN_WARNING,
 314                             "%s(): sys_addr 0x%llx falls outside base/limit "
 315                             "address range for node %d with node interleaving "
 316                             "enabled.\n",
 317                             __func__, sys_addr, node_id);
 318                return NULL;
 319        }
 320
 321found:
 322        return edac_mc_find(node_id);
 323
 324err_no_match:
 325        debugf2("sys_addr 0x%lx doesn't match any node\n",
 326                (unsigned long)sys_addr);
 327
 328        return NULL;
 329}
 330
 331/*
 332 * Extract the DRAM CS base address from selected csrow register.
 333 */
 334static u64 base_from_dct_base(struct amd64_pvt *pvt, int csrow)
 335{
 336        return ((u64) (amd64_get_dct_base(pvt, 0, csrow) & pvt->dcsb_base)) <<
 337                                pvt->dcs_shift;
 338}
 339
 340/*
 341 * Extract the mask from the dcsb0[csrow] entry in a CPU revision-specific way.
 342 */
 343static u64 mask_from_dct_mask(struct amd64_pvt *pvt, int csrow)
 344{
 345        u64 dcsm_bits, other_bits;
 346        u64 mask;
 347
 348        /* Extract bits from DRAM CS Mask. */
 349        dcsm_bits = amd64_get_dct_mask(pvt, 0, csrow) & pvt->dcsm_mask;
 350
 351        other_bits = pvt->dcsm_mask;
 352        other_bits = ~(other_bits << pvt->dcs_shift);
 353
 354        /*
 355         * The extracted bits from DCSM belong in the spaces represented by
 356         * the cleared bits in other_bits.
 357         */
 358        mask = (dcsm_bits << pvt->dcs_shift) | other_bits;
 359
 360        return mask;
 361}
 362
 363/*
 364 * @input_addr is an InputAddr associated with the node given by mci. Return the
 365 * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
 366 */
 367static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
 368{
 369        struct amd64_pvt *pvt;
 370        int csrow;
 371        u64 base, mask;
 372
 373        pvt = mci->pvt_info;
 374
 375        /*
 376         * Here we use the DRAM CS Base and DRAM CS Mask registers. For each CS
 377         * base/mask register pair, test the condition shown near the start of
 378         * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
 379         */
 380        for (csrow = 0; csrow < pvt->cs_count; csrow++) {
 381
 382                /* This DRAM chip select is disabled on this node */
 383                if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
 384                        continue;
 385
 386                base = base_from_dct_base(pvt, csrow);
 387                mask = ~mask_from_dct_mask(pvt, csrow);
 388
 389                if ((input_addr & mask) == (base & mask)) {
 390                        debugf2("InputAddr 0x%lx matches csrow %d (node %d)\n",
 391                                (unsigned long)input_addr, csrow,
 392                                pvt->mc_node_id);
 393
 394                        return csrow;
 395                }
 396        }
 397
 398        debugf2("no matching csrow for InputAddr 0x%lx (MC node %d)\n",
 399                (unsigned long)input_addr, pvt->mc_node_id);
 400
 401        return -1;
 402}
 403
 404/*
 405 * Return the base value defined by the DRAM Base register for the node
 406 * represented by mci.  This function returns the full 40-bit value despite the
 407 * fact that the register only stores bits 39-24 of the value. See section
 408 * 3.4.4.1 (BKDG #26094, K8, revA-E)
 409 */
 410static inline u64 get_dram_base(struct mem_ctl_info *mci)
 411{
 412        struct amd64_pvt *pvt = mci->pvt_info;
 413
 414        return pvt->dram_base[pvt->mc_node_id];
 415}
 416
 417/*
 418 * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
 419 * for the node represented by mci. Info is passed back in *hole_base,
 420 * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
 421 * info is invalid. Info may be invalid for either of the following reasons:
 422 *
 423 * - The revision of the node is not E or greater.  In this case, the DRAM Hole
 424 *   Address Register does not exist.
 425 *
 426 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
 427 *   indicating that its contents are not valid.
 428 *
 429 * The values passed back in *hole_base, *hole_offset, and *hole_size are
 430 * complete 32-bit values despite the fact that the bitfields in the DHAR
 431 * only represent bits 31-24 of the base and offset values.
 432 */
 433int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
 434                             u64 *hole_offset, u64 *hole_size)
 435{
 436        struct amd64_pvt *pvt = mci->pvt_info;
 437        u64 base;
 438
 439        /* only revE and later have the DRAM Hole Address Register */
 440        if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_E) {
 441                debugf1("  revision %d for node %d does not support DHAR\n",
 442                        pvt->ext_model, pvt->mc_node_id);
 443                return 1;
 444        }
 445
 446        /* only valid for Fam10h */
 447        if (boot_cpu_data.x86 == 0x10 &&
 448            (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) == 0) {
 449                debugf1("  Dram Memory Hoisting is DISABLED on this system\n");
 450                return 1;
 451        }
 452
 453        if ((pvt->dhar & DHAR_VALID) == 0) {
 454                debugf1("  Dram Memory Hoisting is DISABLED on this node %d\n",
 455                        pvt->mc_node_id);
 456                return 1;
 457        }
 458
 459        /* This node has Memory Hoisting */
 460
 461        /* +------------------+--------------------+--------------------+-----
 462         * | memory           | DRAM hole          | relocated          |
 463         * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
 464         * |                  |                    | DRAM hole          |
 465         * |                  |                    | [0x100000000,      |
 466         * |                  |                    |  (0x100000000+     |
 467         * |                  |                    |   (0xffffffff-x))] |
 468         * +------------------+--------------------+--------------------+-----
 469         *
 470         * Above is a diagram of physical memory showing the DRAM hole and the
 471         * relocated addresses from the DRAM hole.  As shown, the DRAM hole
 472         * starts at address x (the base address) and extends through address
 473         * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
 474         * addresses in the hole so that they start at 0x100000000.
 475         */
 476
 477        base = dhar_base(pvt->dhar);
 478
 479        *hole_base = base;
 480        *hole_size = (0x1ull << 32) - base;
 481
 482        if (boot_cpu_data.x86 > 0xf)
 483                *hole_offset = f10_dhar_offset(pvt->dhar);
 484        else
 485                *hole_offset = k8_dhar_offset(pvt->dhar);
 486
 487        debugf1("  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
 488                pvt->mc_node_id, (unsigned long)*hole_base,
 489                (unsigned long)*hole_offset, (unsigned long)*hole_size);
 490
 491        return 0;
 492}
 493EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
 494
 495/*
 496 * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
 497 * assumed that sys_addr maps to the node given by mci.
 498 *
 499 * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
 500 * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
 501 * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
 502 * then it is also involved in translating a SysAddr to a DramAddr. Sections
 503 * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
 504 * These parts of the documentation are unclear. I interpret them as follows:
 505 *
 506 * When node n receives a SysAddr, it processes the SysAddr as follows:
 507 *
 508 * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
 509 *    Limit registers for node n. If the SysAddr is not within the range
 510 *    specified by the base and limit values, then node n ignores the Sysaddr
 511 *    (since it does not map to node n). Otherwise continue to step 2 below.
 512 *
 513 * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
 514 *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
 515 *    the range of relocated addresses (starting at 0x100000000) from the DRAM
 516 *    hole. If not, skip to step 3 below. Else get the value of the
 517 *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
 518 *    offset defined by this value from the SysAddr.
 519 *
 520 * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
 521 *    Base register for node n. To obtain the DramAddr, subtract the base
 522 *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
 523 */
 524static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
 525{
 526        u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
 527        int ret = 0;
 528
 529        dram_base = get_dram_base(mci);
 530
 531        ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
 532                                      &hole_size);
 533        if (!ret) {
 534                if ((sys_addr >= (1ull << 32)) &&
 535                    (sys_addr < ((1ull << 32) + hole_size))) {
 536                        /* use DHAR to translate SysAddr to DramAddr */
 537                        dram_addr = sys_addr - hole_offset;
 538
 539                        debugf2("using DHAR to translate SysAddr 0x%lx to "
 540                                "DramAddr 0x%lx\n",
 541                                (unsigned long)sys_addr,
 542                                (unsigned long)dram_addr);
 543
 544                        return dram_addr;
 545                }
 546        }
 547
 548        /*
 549         * Translate the SysAddr to a DramAddr as shown near the start of
 550         * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
 551         * only deals with 40-bit values.  Therefore we discard bits 63-40 of
 552         * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
 553         * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
 554         * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
 555         * Programmer's Manual Volume 1 Application Programming.
 556         */
 557        dram_addr = (sys_addr & 0xffffffffffull) - dram_base;
 558
 559        debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
 560                "DramAddr 0x%lx\n", (unsigned long)sys_addr,
 561                (unsigned long)dram_addr);
 562        return dram_addr;
 563}
 564
 565/*
 566 * @intlv_en is the value of the IntlvEn field from a DRAM Base register
 567 * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
 568 * for node interleaving.
 569 */
 570static int num_node_interleave_bits(unsigned intlv_en)
 571{
 572        static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
 573        int n;
 574
 575        BUG_ON(intlv_en > 7);
 576        n = intlv_shift_table[intlv_en];
 577        return n;
 578}
 579
 580/* Translate the DramAddr given by @dram_addr to an InputAddr. */
 581static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
 582{
 583        struct amd64_pvt *pvt;
 584        int intlv_shift;
 585        u64 input_addr;
 586
 587        pvt = mci->pvt_info;
 588
 589        /*
 590         * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
 591         * concerning translating a DramAddr to an InputAddr.
 592         */
 593        intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
 594        input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
 595            (dram_addr & 0xfff);
 596
 597        debugf2("  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
 598                intlv_shift, (unsigned long)dram_addr,
 599                (unsigned long)input_addr);
 600
 601        return input_addr;
 602}
 603
 604/*
 605 * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
 606 * assumed that @sys_addr maps to the node given by mci.
 607 */
 608static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
 609{
 610        u64 input_addr;
 611
 612        input_addr =
 613            dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
 614
 615        debugf2("SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
 616                (unsigned long)sys_addr, (unsigned long)input_addr);
 617
 618        return input_addr;
 619}
 620
 621
 622/*
 623 * @input_addr is an InputAddr associated with the node represented by mci.
 624 * Translate @input_addr to a DramAddr and return the result.
 625 */
 626static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
 627{
 628        struct amd64_pvt *pvt;
 629        int node_id, intlv_shift;
 630        u64 bits, dram_addr;
 631        u32 intlv_sel;
 632
 633        /*
 634         * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
 635         * shows how to translate a DramAddr to an InputAddr. Here we reverse
 636         * this procedure. When translating from a DramAddr to an InputAddr, the
 637         * bits used for node interleaving are discarded.  Here we recover these
 638         * bits from the IntlvSel field of the DRAM Limit register (section
 639         * 3.4.4.2) for the node that input_addr is associated with.
 640         */
 641        pvt = mci->pvt_info;
 642        node_id = pvt->mc_node_id;
 643        BUG_ON((node_id < 0) || (node_id > 7));
 644
 645        intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
 646
 647        if (intlv_shift == 0) {
 648                debugf1("    InputAddr 0x%lx translates to DramAddr of "
 649                        "same value\n", (unsigned long)input_addr);
 650
 651                return input_addr;
 652        }
 653
 654        bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
 655            (input_addr & 0xfff);
 656
 657        intlv_sel = pvt->dram_IntlvSel[node_id] & ((1 << intlv_shift) - 1);
 658        dram_addr = bits + (intlv_sel << 12);
 659
 660        debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
 661                "(%d node interleave bits)\n", (unsigned long)input_addr,
 662                (unsigned long)dram_addr, intlv_shift);
 663
 664        return dram_addr;
 665}
 666
 667/*
 668 * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
 669 * @dram_addr to a SysAddr.
 670 */
 671static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
 672{
 673        struct amd64_pvt *pvt = mci->pvt_info;
 674        u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;
 675        int ret = 0;
 676
 677        ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
 678                                      &hole_size);
 679        if (!ret) {
 680                if ((dram_addr >= hole_base) &&
 681                    (dram_addr < (hole_base + hole_size))) {
 682                        sys_addr = dram_addr + hole_offset;
 683
 684                        debugf1("using DHAR to translate DramAddr 0x%lx to "
 685                                "SysAddr 0x%lx\n", (unsigned long)dram_addr,
 686                                (unsigned long)sys_addr);
 687
 688                        return sys_addr;
 689                }
 690        }
 691
 692        amd64_get_base_and_limit(pvt, pvt->mc_node_id, &base, &limit);
 693        sys_addr = dram_addr + base;
 694
 695        /*
 696         * The sys_addr we have computed up to this point is a 40-bit value
 697         * because the k8 deals with 40-bit values.  However, the value we are
 698         * supposed to return is a full 64-bit physical address.  The AMD
 699         * x86-64 architecture specifies that the most significant implemented
 700         * address bit through bit 63 of a physical address must be either all
 701         * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
 702         * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
 703         * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
 704         * Programming.
 705         */
 706        sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
 707
 708        debugf1("    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
 709                pvt->mc_node_id, (unsigned long)dram_addr,
 710                (unsigned long)sys_addr);
 711
 712        return sys_addr;
 713}
 714
 715/*
 716 * @input_addr is an InputAddr associated with the node given by mci. Translate
 717 * @input_addr to a SysAddr.
 718 */
 719static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
 720                                         u64 input_addr)
 721{
 722        return dram_addr_to_sys_addr(mci,
 723                                     input_addr_to_dram_addr(mci, input_addr));
 724}
 725
 726/*
 727 * Find the minimum and maximum InputAddr values that map to the given @csrow.
 728 * Pass back these values in *input_addr_min and *input_addr_max.
 729 */
 730static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
 731                              u64 *input_addr_min, u64 *input_addr_max)
 732{
 733        struct amd64_pvt *pvt;
 734        u64 base, mask;
 735
 736        pvt = mci->pvt_info;
 737        BUG_ON((csrow < 0) || (csrow >= pvt->cs_count));
 738
 739        base = base_from_dct_base(pvt, csrow);
 740        mask = mask_from_dct_mask(pvt, csrow);
 741
 742        *input_addr_min = base & ~mask;
 743        *input_addr_max = base | mask | pvt->dcs_mask_notused;
 744}
 745
 746/*
 747 * Extract error address from MCA NB Address Low (section 3.6.4.5) and MCA NB
 748 * Address High (section 3.6.4.6) register values and return the result. Address
 749 * is located in the info structure (nbeah and nbeal), the encoding is device
 750 * specific.
 751 */
 752static u64 extract_error_address(struct mem_ctl_info *mci,
 753                                 struct err_regs *info)
 754{
 755        struct amd64_pvt *pvt = mci->pvt_info;
 756
 757        return pvt->ops->get_error_address(mci, info);
 758}
 759
 760
 761/* Map the Error address to a PAGE and PAGE OFFSET. */
 762static inline void error_address_to_page_and_offset(u64 error_address,
 763                                                    u32 *page, u32 *offset)
 764{
 765        *page = (u32) (error_address >> PAGE_SHIFT);
 766        *offset = ((u32) error_address) & ~PAGE_MASK;
 767}
 768
 769/*
 770 * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
 771 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
 772 * of a node that detected an ECC memory error.  mci represents the node that
 773 * the error address maps to (possibly different from the node that detected
 774 * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
 775 * error.
 776 */
 777static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
 778{
 779        int csrow;
 780
 781        csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
 782
 783        if (csrow == -1)
 784                amd64_mc_printk(mci, KERN_ERR,
 785                             "Failed to translate InputAddr to csrow for "
 786                             "address 0x%lx\n", (unsigned long)sys_addr);
 787        return csrow;
 788}
 789
 790static int get_channel_from_ecc_syndrome(unsigned short syndrome);
 791
 792static void amd64_cpu_display_info(struct amd64_pvt *pvt)
 793{
 794        if (boot_cpu_data.x86 == 0x11)
 795                edac_printk(KERN_DEBUG, EDAC_MC, "F11h CPU detected\n");
 796        else if (boot_cpu_data.x86 == 0x10)
 797                edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n");
 798        else if (boot_cpu_data.x86 == 0xf)
 799                edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n",
 800                        (pvt->ext_model >= OPTERON_CPU_REV_F) ?
 801                        "Rev F or later" : "Rev E or earlier");
 802        else
 803                /* we'll hardly ever ever get here */
 804                edac_printk(KERN_ERR, EDAC_MC, "Unknown cpu!\n");
 805}
 806
 807/*
 808 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
 809 * are ECC capable.
 810 */
 811static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
 812{
 813        int bit;
 814        enum dev_type edac_cap = EDAC_FLAG_NONE;
 815
 816        bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= OPTERON_CPU_REV_F)
 817                ? 19
 818                : 17;
 819
 820        if (pvt->dclr0 & BIT(bit))
 821                edac_cap = EDAC_FLAG_SECDED;
 822
 823        return edac_cap;
 824}
 825
 826
 827static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt,
 828                                         int ganged);
 829
 830/* Display and decode various NB registers for debug purposes. */
 831static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
 832{
 833        int ganged;
 834
 835        debugf1("  nbcap:0x%8.08x DctDualCap=%s DualNode=%s 8-Node=%s\n",
 836                pvt->nbcap,
 837                (pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "True" : "False",
 838                (pvt->nbcap & K8_NBCAP_DUAL_NODE) ? "True" : "False",
 839                (pvt->nbcap & K8_NBCAP_8_NODE) ? "True" : "False");
 840        debugf1("    ECC Capable=%s   ChipKill Capable=%s\n",
 841                (pvt->nbcap & K8_NBCAP_SECDED) ? "True" : "False",
 842                (pvt->nbcap & K8_NBCAP_CHIPKILL) ? "True" : "False");
 843        debugf1("  DramCfg0-low=0x%08x DIMM-ECC=%s Parity=%s Width=%s\n",
 844                pvt->dclr0,
 845                (pvt->dclr0 & BIT(19)) ?  "Enabled" : "Disabled",
 846                (pvt->dclr0 & BIT(8)) ?  "Enabled" : "Disabled",
 847                (pvt->dclr0 & BIT(11)) ?  "128b" : "64b");
 848        debugf1("    DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s  DIMM Type=%s\n",
 849                (pvt->dclr0 & BIT(12)) ?  "Y" : "N",
 850                (pvt->dclr0 & BIT(13)) ?  "Y" : "N",
 851                (pvt->dclr0 & BIT(14)) ?  "Y" : "N",
 852                (pvt->dclr0 & BIT(15)) ?  "Y" : "N",
 853                (pvt->dclr0 & BIT(16)) ?  "UN-Buffered" : "Buffered");
 854
 855
 856        debugf1("  online-spare: 0x%8.08x\n", pvt->online_spare);
 857
 858        if (boot_cpu_data.x86 == 0xf) {
 859                debugf1("  dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n",
 860                        pvt->dhar, dhar_base(pvt->dhar),
 861                        k8_dhar_offset(pvt->dhar));
 862                debugf1("      DramHoleValid=%s\n",
 863                        (pvt->dhar & DHAR_VALID) ?  "True" : "False");
 864
 865                debugf1("  dbam-dkt: 0x%8.08x\n", pvt->dbam0);
 866
 867                /* everything below this point is Fam10h and above */
 868                return;
 869
 870        } else {
 871                debugf1("  dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n",
 872                        pvt->dhar, dhar_base(pvt->dhar),
 873                        f10_dhar_offset(pvt->dhar));
 874                debugf1("    DramMemHoistValid=%s DramHoleValid=%s\n",
 875                        (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) ?
 876                        "True" : "False",
 877                        (pvt->dhar & DHAR_VALID) ?
 878                        "True" : "False");
 879        }
 880
 881        /* Only if NOT ganged does dcl1 have valid info */
 882        if (!dct_ganging_enabled(pvt)) {
 883                debugf1("  DramCfg1-low=0x%08x DIMM-ECC=%s Parity=%s "
 884                        "Width=%s\n", pvt->dclr1,
 885                        (pvt->dclr1 & BIT(19)) ?  "Enabled" : "Disabled",
 886                        (pvt->dclr1 & BIT(8)) ?  "Enabled" : "Disabled",
 887                        (pvt->dclr1 & BIT(11)) ?  "128b" : "64b");
 888                debugf1("    DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s  "
 889                        "DIMM Type=%s\n",
 890                        (pvt->dclr1 & BIT(12)) ?  "Y" : "N",
 891                        (pvt->dclr1 & BIT(13)) ?  "Y" : "N",
 892                        (pvt->dclr1 & BIT(14)) ?  "Y" : "N",
 893                        (pvt->dclr1 & BIT(15)) ?  "Y" : "N",
 894                        (pvt->dclr1 & BIT(16)) ?  "UN-Buffered" : "Buffered");
 895        }
 896
 897        /*
 898         * Determine if ganged and then dump memory sizes for first controller,
 899         * and if NOT ganged dump info for 2nd controller.
 900         */
 901        ganged = dct_ganging_enabled(pvt);
 902
 903        f10_debug_display_dimm_sizes(0, pvt, ganged);
 904
 905        if (!ganged)
 906                f10_debug_display_dimm_sizes(1, pvt, ganged);
 907}
 908
 909/* Read in both of DBAM registers */
 910static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
 911{
 912        int err = 0;
 913        unsigned int reg;
 914
 915        reg = DBAM0;
 916        err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam0);
 917        if (err)
 918                goto err_reg;
 919
 920        if (boot_cpu_data.x86 >= 0x10) {
 921                reg = DBAM1;
 922                err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam1);
 923
 924                if (err)
 925                        goto err_reg;
 926        }
 927
 928        return;
 929
 930err_reg:
 931        debugf0("Error reading F2x%03x.\n", reg);
 932}
 933
 934/*
 935 * NOTE: CPU Revision Dependent code: Rev E and Rev F
 936 *
 937 * Set the DCSB and DCSM mask values depending on the CPU revision value. Also
 938 * set the shift factor for the DCSB and DCSM values.
 939 *
 940 * ->dcs_mask_notused, RevE:
 941 *
 942 * To find the max InputAddr for the csrow, start with the base address and set
 943 * all bits that are "don't care" bits in the test at the start of section
 944 * 3.5.4 (p. 84).
 945 *
 946 * The "don't care" bits are all set bits in the mask and all bits in the gaps
 947 * between bit ranges [35:25] and [19:13]. The value REV_E_DCS_NOTUSED_BITS
 948 * represents bits [24:20] and [12:0], which are all bits in the above-mentioned
 949 * gaps.
 950 *
 951 * ->dcs_mask_notused, RevF and later:
 952 *
 953 * To find the max InputAddr for the csrow, start with the base address and set
 954 * all bits that are "don't care" bits in the test at the start of NPT section
 955 * 4.5.4 (p. 87).
 956 *
 957 * The "don't care" bits are all set bits in the mask and all bits in the gaps
 958 * between bit ranges [36:27] and [21:13].
 959 *
 960 * The value REV_F_F1Xh_DCS_NOTUSED_BITS represents bits [26:22] and [12:0],
 961 * which are all bits in the above-mentioned gaps.
 962 */
 963static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
 964{
 965
 966        if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) {
 967                pvt->dcsb_base          = REV_E_DCSB_BASE_BITS;
 968                pvt->dcsm_mask          = REV_E_DCSM_MASK_BITS;
 969                pvt->dcs_mask_notused   = REV_E_DCS_NOTUSED_BITS;
 970                pvt->dcs_shift          = REV_E_DCS_SHIFT;
 971                pvt->cs_count           = 8;
 972                pvt->num_dcsm           = 8;
 973        } else {
 974                pvt->dcsb_base          = REV_F_F1Xh_DCSB_BASE_BITS;
 975                pvt->dcsm_mask          = REV_F_F1Xh_DCSM_MASK_BITS;
 976                pvt->dcs_mask_notused   = REV_F_F1Xh_DCS_NOTUSED_BITS;
 977                pvt->dcs_shift          = REV_F_F1Xh_DCS_SHIFT;
 978
 979                if (boot_cpu_data.x86 == 0x11) {
 980                        pvt->cs_count = 4;
 981                        pvt->num_dcsm = 2;
 982                } else {
 983                        pvt->cs_count = 8;
 984                        pvt->num_dcsm = 4;
 985                }
 986        }
 987}
 988
 989/*
 990 * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask hw registers
 991 */
 992static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
 993{
 994        int cs, reg, err = 0;
 995
 996        amd64_set_dct_base_and_mask(pvt);
 997
 998        for (cs = 0; cs < pvt->cs_count; cs++) {
 999                reg = K8_DCSB0 + (cs * 4);
1000                err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
1001                                                &pvt->dcsb0[cs]);
1002                if (unlikely(err))
1003                        debugf0("Reading K8_DCSB0[%d] failed\n", cs);
1004                else
1005                        debugf0("  DCSB0[%d]=0x%08x reg: F2x%x\n",
1006                                cs, pvt->dcsb0[cs], reg);
1007
1008                /* If DCT are NOT ganged, then read in DCT1's base */
1009                if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
1010                        reg = F10_DCSB1 + (cs * 4);
1011                        err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
1012                                                        &pvt->dcsb1[cs]);
1013                        if (unlikely(err))
1014                                debugf0("Reading F10_DCSB1[%d] failed\n", cs);
1015                        else
1016                                debugf0("  DCSB1[%d]=0x%08x reg: F2x%x\n",
1017                                        cs, pvt->dcsb1[cs], reg);
1018                } else {
1019                        pvt->dcsb1[cs] = 0;
1020                }
1021        }
1022
1023        for (cs = 0; cs < pvt->num_dcsm; cs++) {
1024                reg = K8_DCSM0 + (cs * 4);
1025                err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
1026                                        &pvt->dcsm0[cs]);
1027                if (unlikely(err))
1028                        debugf0("Reading K8_DCSM0 failed\n");
1029                else
1030                        debugf0("    DCSM0[%d]=0x%08x reg: F2x%x\n",
1031                                cs, pvt->dcsm0[cs], reg);
1032
1033                /* If DCT are NOT ganged, then read in DCT1's mask */
1034                if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
1035                        reg = F10_DCSM1 + (cs * 4);
1036                        err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
1037                                        &pvt->dcsm1[cs]);
1038                        if (unlikely(err))
1039                                debugf0("Reading F10_DCSM1[%d] failed\n", cs);
1040                        else
1041                                debugf0("    DCSM1[%d]=0x%08x reg: F2x%x\n",
1042                                        cs, pvt->dcsm1[cs], reg);
1043                } else
1044                        pvt->dcsm1[cs] = 0;
1045        }
1046}
1047
1048static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
1049{
1050        enum mem_type type;
1051
1052        if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= OPTERON_CPU_REV_F) {
1053                /* Rev F and later */
1054                type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
1055        } else {
1056                /* Rev E and earlier */
1057                type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
1058        }
1059
1060        debugf1("  Memory type is: %s\n",
1061                (type == MEM_DDR2) ? "MEM_DDR2" :
1062                (type == MEM_RDDR2) ? "MEM_RDDR2" :
1063                (type == MEM_DDR) ? "MEM_DDR" : "MEM_RDDR");
1064
1065        return type;
1066}
1067
1068/*
1069 * Read the DRAM Configuration Low register. It differs between CG, D & E revs
1070 * and the later RevF memory controllers (DDR vs DDR2)
1071 *
1072 * Return:
1073 *      number of memory channels in operation
1074 * Pass back:
1075 *      contents of the DCL0_LOW register
1076 */
1077static int k8_early_channel_count(struct amd64_pvt *pvt)
1078{
1079        int flag, err = 0;
1080
1081        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
1082        if (err)
1083                return err;
1084
1085        if ((boot_cpu_data.x86_model >> 4) >= OPTERON_CPU_REV_F) {
1086                /* RevF (NPT) and later */
1087                flag = pvt->dclr0 & F10_WIDTH_128;
1088        } else {
1089                /* RevE and earlier */
1090                flag = pvt->dclr0 & REVE_WIDTH_128;
1091        }
1092
1093        /* not used */
1094        pvt->dclr1 = 0;
1095
1096        return (flag) ? 2 : 1;
1097}
1098
1099/* extract the ERROR ADDRESS for the K8 CPUs */
1100static u64 k8_get_error_address(struct mem_ctl_info *mci,
1101                                struct err_regs *info)
1102{
1103        return (((u64) (info->nbeah & 0xff)) << 32) +
1104                        (info->nbeal & ~0x03);
1105}
1106
1107/*
1108 * Read the Base and Limit registers for K8 based Memory controllers; extract
1109 * fields from the 'raw' reg into separate data fields
1110 *
1111 * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN
1112 */
1113static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
1114{
1115        u32 low;
1116        u32 off = dram << 3;    /* 8 bytes between DRAM entries */
1117        int err;
1118
1119        err = pci_read_config_dword(pvt->addr_f1_ctl,
1120                                    K8_DRAM_BASE_LOW + off, &low);
1121        if (err)
1122                debugf0("Reading K8_DRAM_BASE_LOW failed\n");
1123
1124        /* Extract parts into separate data entries */
1125        pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
1126        pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
1127        pvt->dram_rw_en[dram] = (low & 0x3);
1128
1129        err = pci_read_config_dword(pvt->addr_f1_ctl,
1130                                    K8_DRAM_LIMIT_LOW + off, &low);
1131        if (err)
1132                debugf0("Reading K8_DRAM_LIMIT_LOW failed\n");
1133
1134        /*
1135         * Extract parts into separate data entries. Limit is the HIGHEST memory
1136         * location of the region, so lower 24 bits need to be all ones
1137         */
1138        pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
1139        pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
1140        pvt->dram_DstNode[dram] = (low & 0x7);
1141}
1142
1143static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1144                                        struct err_regs *info,
1145                                        u64 SystemAddress)
1146{
1147        struct mem_ctl_info *src_mci;
1148        unsigned short syndrome;
1149        int channel, csrow;
1150        u32 page, offset;
1151
1152        /* Extract the syndrome parts and form a 16-bit syndrome */
1153        syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
1154        syndrome |= LOW_SYNDROME(info->nbsh);
1155
1156        /* CHIPKILL enabled */
1157        if (info->nbcfg & K8_NBCFG_CHIPKILL) {
1158                channel = get_channel_from_ecc_syndrome(syndrome);
1159                if (channel < 0) {
1160                        /*
1161                         * Syndrome didn't map, so we don't know which of the
1162                         * 2 DIMMs is in error. So we need to ID 'both' of them
1163                         * as suspect.
1164                         */
1165                        amd64_mc_printk(mci, KERN_WARNING,
1166                                       "unknown syndrome 0x%x - possible error "
1167                                       "reporting race\n", syndrome);
1168                        edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1169                        return;
1170                }
1171        } else {
1172                /*
1173                 * non-chipkill ecc mode
1174                 *
1175                 * The k8 documentation is unclear about how to determine the
1176                 * channel number when using non-chipkill memory.  This method
1177                 * was obtained from email communication with someone at AMD.
1178                 * (Wish the email was placed in this comment - norsk)
1179                 */
1180                channel = ((SystemAddress & BIT(3)) != 0);
1181        }
1182
1183        /*
1184         * Find out which node the error address belongs to. This may be
1185         * different from the node that detected the error.
1186         */
1187        src_mci = find_mc_by_sys_addr(mci, SystemAddress);
1188        if (!src_mci) {
1189                amd64_mc_printk(mci, KERN_ERR,
1190                             "failed to map error address 0x%lx to a node\n",
1191                             (unsigned long)SystemAddress);
1192                edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1193                return;
1194        }
1195
1196        /* Now map the SystemAddress to a CSROW */
1197        csrow = sys_addr_to_csrow(src_mci, SystemAddress);
1198        if (csrow < 0) {
1199                edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
1200        } else {
1201                error_address_to_page_and_offset(SystemAddress, &page, &offset);
1202
1203                edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
1204                                  channel, EDAC_MOD_STR);
1205        }
1206}
1207
1208/*
1209 * determrine the number of PAGES in for this DIMM's size based on its DRAM
1210 * Address Mapping.
1211 *
1212 * First step is to calc the number of bits to shift a value of 1 left to
1213 * indicate show many pages. Start with the DBAM value as the starting bits,
1214 * then proceed to adjust those shift bits, based on CPU rev and the table.
1215 * See BKDG on the DBAM
1216 */
1217static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
1218{
1219        int nr_pages;
1220
1221        if (pvt->ext_model >= OPTERON_CPU_REV_F) {
1222                nr_pages = 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT);
1223        } else {
1224                /*
1225                 * RevE and less section; this line is tricky. It collapses the
1226                 * table used by RevD and later to one that matches revisions CG
1227                 * and earlier.
1228                 */
1229                dram_map -= (pvt->ext_model >= OPTERON_CPU_REV_D) ?
1230                                (dram_map > 8 ? 4 : (dram_map > 5 ?
1231                                3 : (dram_map > 2 ? 1 : 0))) : 0;
1232
1233                /* 25 shift is 32MiB minimum DIMM size in RevE and prior */
1234                nr_pages = 1 << (dram_map + 25 - PAGE_SHIFT);
1235        }
1236
1237        return nr_pages;
1238}
1239
1240/*
1241 * Get the number of DCT channels in use.
1242 *
1243 * Return:
1244 *      number of Memory Channels in operation
1245 * Pass back:
1246 *      contents of the DCL0_LOW register
1247 */
1248static int f10_early_channel_count(struct amd64_pvt *pvt)
1249{
1250        int dbams[] = { DBAM0, DBAM1 };
1251        int err = 0, channels = 0;
1252        int i, j;
1253        u32 dbam;
1254
1255        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
1256        if (err)
1257                goto err_reg;
1258
1259        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1);
1260        if (err)
1261                goto err_reg;
1262
1263        /* If we are in 128 bit mode, then we are using 2 channels */
1264        if (pvt->dclr0 & F10_WIDTH_128) {
1265                debugf0("Data WIDTH is 128 bits - 2 channels\n");
1266                channels = 2;
1267                return channels;
1268        }
1269
1270        /*
1271         * Need to check if in UN-ganged mode: In such, there are 2 channels,
1272         * but they are NOT in 128 bit mode and thus the above 'dcl0' status bit
1273         * will be OFF.
1274         *
1275         * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1276         * their CSEnable bit on. If so, then SINGLE DIMM case.
1277         */
1278        debugf0("Data WIDTH is NOT 128 bits - need more decoding\n");
1279
1280        /*
1281         * Check DRAM Bank Address Mapping values for each DIMM to see if there
1282         * is more than just one DIMM present in unganged mode. Need to check
1283         * both controllers since DIMMs can be placed in either one.
1284         */
1285        for (i = 0; i < ARRAY_SIZE(dbams); i++) {
1286                err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam);
1287                if (err)
1288                        goto err_reg;
1289
1290                for (j = 0; j < 4; j++) {
1291                        if (DBAM_DIMM(j, dbam) > 0) {
1292                                channels++;
1293                                break;
1294                        }
1295                }
1296        }
1297
1298        debugf0("MCT channel count: %d\n", channels);
1299
1300        return channels;
1301
1302err_reg:
1303        return -1;
1304
1305}
1306
1307static int f10_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
1308{
1309        return 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT);
1310}
1311
1312/* Enable extended configuration access via 0xCF8 feature */
1313static void amd64_setup(struct amd64_pvt *pvt)
1314{
1315        u32 reg;
1316
1317        pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
1318
1319        pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG);
1320        reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
1321        pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
1322}
1323
1324/* Restore the extended configuration access via 0xCF8 feature */
1325static void amd64_teardown(struct amd64_pvt *pvt)
1326{
1327        u32 reg;
1328
1329        pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
1330
1331        reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG;
1332        if (pvt->flags.cf8_extcfg)
1333                reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
1334        pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
1335}
1336
1337static u64 f10_get_error_address(struct mem_ctl_info *mci,
1338                        struct err_regs *info)
1339{
1340        return (((u64) (info->nbeah & 0xffff)) << 32) +
1341                        (info->nbeal & ~0x01);
1342}
1343
1344/*
1345 * Read the Base and Limit registers for F10 based Memory controllers. Extract
1346 * fields from the 'raw' reg into separate data fields.
1347 *
1348 * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN.
1349 */
1350static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
1351{
1352        u32 high_offset, low_offset, high_base, low_base, high_limit, low_limit;
1353
1354        low_offset = K8_DRAM_BASE_LOW + (dram << 3);
1355        high_offset = F10_DRAM_BASE_HIGH + (dram << 3);
1356
1357        /* read the 'raw' DRAM BASE Address register */
1358        pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_base);
1359
1360        /* Read from the ECS data register */
1361        pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_base);
1362
1363        /* Extract parts into separate data entries */
1364        pvt->dram_rw_en[dram] = (low_base & 0x3);
1365
1366        if (pvt->dram_rw_en[dram] == 0)
1367                return;
1368
1369        pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
1370
1371        pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) |
1372                               (((u64)low_base  & 0xFFFF0000) << 8);
1373
1374        low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
1375        high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
1376
1377        /* read the 'raw' LIMIT registers */
1378        pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_limit);
1379
1380        /* Read from the ECS data register for the HIGH portion */
1381        pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_limit);
1382
1383        debugf0("  HW Regs: BASE=0x%08x-%08x      LIMIT=  0x%08x-%08x\n",
1384                high_base, low_base, high_limit, low_limit);
1385
1386        pvt->dram_DstNode[dram] = (low_limit & 0x7);
1387        pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7;
1388
1389        /*
1390         * Extract address values and form a LIMIT address. Limit is the HIGHEST
1391         * memory location of the region, so low 24 bits need to be all ones.
1392         */
1393        pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) |
1394                                (((u64) low_limit & 0xFFFF0000) << 8) |
1395                                0x00FFFFFF;
1396}
1397
1398static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
1399{
1400        int err = 0;
1401
1402        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW,
1403                                    &pvt->dram_ctl_select_low);
1404        if (err) {
1405                debugf0("Reading F10_DCTL_SEL_LOW failed\n");
1406        } else {
1407                debugf0("DRAM_DCTL_SEL_LOW=0x%x  DctSelBaseAddr=0x%x\n",
1408                        pvt->dram_ctl_select_low, dct_sel_baseaddr(pvt));
1409
1410                debugf0("  DRAM DCTs are=%s DRAM Is=%s DRAM-Ctl-"
1411                                "sel-hi-range=%s\n",
1412                        (dct_ganging_enabled(pvt) ? "GANGED" : "NOT GANGED"),
1413                        (dct_dram_enabled(pvt) ? "Enabled"   : "Disabled"),
1414                        (dct_high_range_enabled(pvt) ? "Enabled" : "Disabled"));
1415
1416                debugf0("  DctDatIntLv=%s MemCleared=%s DctSelIntLvAddr=0x%x\n",
1417                        (dct_data_intlv_enabled(pvt) ? "Enabled" : "Disabled"),
1418                        (dct_memory_cleared(pvt) ? "True " : "False "),
1419                        dct_sel_interleave_addr(pvt));
1420        }
1421
1422        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH,
1423                                    &pvt->dram_ctl_select_high);
1424        if (err)
1425                debugf0("Reading F10_DCTL_SEL_HIGH failed\n");
1426}
1427
1428/*
1429 * determine channel based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1430 * Interleaving Modes.
1431 */
1432static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1433                                int hi_range_sel, u32 intlv_en)
1434{
1435        u32 cs, temp, dct_sel_high = (pvt->dram_ctl_select_low >> 1) & 1;
1436
1437        if (dct_ganging_enabled(pvt))
1438                cs = 0;
1439        else if (hi_range_sel)
1440                cs = dct_sel_high;
1441        else if (dct_interleave_enabled(pvt)) {
1442                /*
1443                 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1444                 */
1445                if (dct_sel_interleave_addr(pvt) == 0)
1446                        cs = sys_addr >> 6 & 1;
1447                else if ((dct_sel_interleave_addr(pvt) >> 1) & 1) {
1448                        temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
1449
1450                        if (dct_sel_interleave_addr(pvt) & 1)
1451                                cs = (sys_addr >> 9 & 1) ^ temp;
1452                        else
1453                                cs = (sys_addr >> 6 & 1) ^ temp;
1454                } else if (intlv_en & 4)
1455                        cs = sys_addr >> 15 & 1;
1456                else if (intlv_en & 2)
1457                        cs = sys_addr >> 14 & 1;
1458                else if (intlv_en & 1)
1459                        cs = sys_addr >> 13 & 1;
1460                else
1461                        cs = sys_addr >> 12 & 1;
1462        } else if (dct_high_range_enabled(pvt) && !dct_ganging_enabled(pvt))
1463                cs = ~dct_sel_high & 1;
1464        else
1465                cs = 0;
1466
1467        return cs;
1468}
1469
1470static inline u32 f10_map_intlv_en_to_shift(u32 intlv_en)
1471{
1472        if (intlv_en == 1)
1473                return 1;
1474        else if (intlv_en == 3)
1475                return 2;
1476        else if (intlv_en == 7)
1477                return 3;
1478
1479        return 0;
1480}
1481
1482/* See F10h BKDG, 2.8.10.2 DctSelBaseOffset Programming */
1483static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
1484                                                 u32 dct_sel_base_addr,
1485                                                 u64 dct_sel_base_off,
1486                                                 u32 hole_valid, u32 hole_off,
1487                                                 u64 dram_base)
1488{
1489        u64 chan_off;
1490
1491        if (hi_range_sel) {
1492                if (!(dct_sel_base_addr & 0xFFFFF800) &&
1493                   hole_valid && (sys_addr >= 0x100000000ULL))
1494                        chan_off = hole_off << 16;
1495                else
1496                        chan_off = dct_sel_base_off;
1497        } else {
1498                if (hole_valid && (sys_addr >= 0x100000000ULL))
1499                        chan_off = hole_off << 16;
1500                else
1501                        chan_off = dram_base & 0xFFFFF8000000ULL;
1502        }
1503
1504        return (sys_addr & 0x0000FFFFFFFFFFC0ULL) -
1505                        (chan_off & 0x0000FFFFFF800000ULL);
1506}
1507
1508/* Hack for the time being - Can we get this from BIOS?? */
1509#define CH0SPARE_RANK   0
1510#define CH1SPARE_RANK   1
1511
1512/*
1513 * checks if the csrow passed in is marked as SPARED, if so returns the new
1514 * spare row
1515 */
1516static inline int f10_process_possible_spare(int csrow,
1517                                u32 cs, struct amd64_pvt *pvt)
1518{
1519        u32 swap_done;
1520        u32 bad_dram_cs;
1521
1522        /* Depending on channel, isolate respective SPARING info */
1523        if (cs) {
1524                swap_done = F10_ONLINE_SPARE_SWAPDONE1(pvt->online_spare);
1525                bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS1(pvt->online_spare);
1526                if (swap_done && (csrow == bad_dram_cs))
1527                        csrow = CH1SPARE_RANK;
1528        } else {
1529                swap_done = F10_ONLINE_SPARE_SWAPDONE0(pvt->online_spare);
1530                bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS0(pvt->online_spare);
1531                if (swap_done && (csrow == bad_dram_cs))
1532                        csrow = CH0SPARE_RANK;
1533        }
1534        return csrow;
1535}
1536
1537/*
1538 * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1539 * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1540 *
1541 * Return:
1542 *      -EINVAL:  NOT FOUND
1543 *      0..csrow = Chip-Select Row
1544 */
1545static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
1546{
1547        struct mem_ctl_info *mci;
1548        struct amd64_pvt *pvt;
1549        u32 cs_base, cs_mask;
1550        int cs_found = -EINVAL;
1551        int csrow;
1552
1553        mci = mci_lookup[nid];
1554        if (!mci)
1555                return cs_found;
1556
1557        pvt = mci->pvt_info;
1558
1559        debugf1("InputAddr=0x%x  channelselect=%d\n", in_addr, cs);
1560
1561        for (csrow = 0; csrow < pvt->cs_count; csrow++) {
1562
1563                cs_base = amd64_get_dct_base(pvt, cs, csrow);
1564                if (!(cs_base & K8_DCSB_CS_ENABLE))
1565                        continue;
1566
1567                /*
1568                 * We have an ENABLED CSROW, Isolate just the MASK bits of the
1569                 * target: [28:19] and [13:5], which map to [36:27] and [21:13]
1570                 * of the actual address.
1571                 */
1572                cs_base &= REV_F_F1Xh_DCSB_BASE_BITS;
1573
1574                /*
1575                 * Get the DCT Mask, and ENABLE the reserved bits: [18:16] and
1576                 * [4:0] to become ON. Then mask off bits [28:0] ([36:8])
1577                 */
1578                cs_mask = amd64_get_dct_mask(pvt, cs, csrow);
1579
1580                debugf1("    CSROW=%d CSBase=0x%x RAW CSMask=0x%x\n",
1581                                csrow, cs_base, cs_mask);
1582
1583                cs_mask = (cs_mask | 0x0007C01F) & 0x1FFFFFFF;
1584
1585                debugf1("              Final CSMask=0x%x\n", cs_mask);
1586                debugf1("    (InputAddr & ~CSMask)=0x%x "
1587                                "(CSBase & ~CSMask)=0x%x\n",
1588                                (in_addr & ~cs_mask), (cs_base & ~cs_mask));
1589
1590                if ((in_addr & ~cs_mask) == (cs_base & ~cs_mask)) {
1591                        cs_found = f10_process_possible_spare(csrow, cs, pvt);
1592
1593                        debugf1(" MATCH csrow=%d\n", cs_found);
1594                        break;
1595                }
1596        }
1597        return cs_found;
1598}
1599
1600/* For a given @dram_range, check if @sys_addr falls within it. */
1601static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
1602                                  u64 sys_addr, int *nid, int *chan_sel)
1603{
1604        int node_id, cs_found = -EINVAL, high_range = 0;
1605        u32 intlv_en, intlv_sel, intlv_shift, hole_off;
1606        u32 hole_valid, tmp, dct_sel_base, channel;
1607        u64 dram_base, chan_addr, dct_sel_base_off;
1608
1609        dram_base = pvt->dram_base[dram_range];
1610        intlv_en = pvt->dram_IntlvEn[dram_range];
1611
1612        node_id = pvt->dram_DstNode[dram_range];
1613        intlv_sel = pvt->dram_IntlvSel[dram_range];
1614
1615        debugf1("(dram=%d) Base=0x%llx SystemAddr= 0x%llx Limit=0x%llx\n",
1616                dram_range, dram_base, sys_addr, pvt->dram_limit[dram_range]);
1617
1618        /*
1619         * This assumes that one node's DHAR is the same as all the other
1620         * nodes' DHAR.
1621         */
1622        hole_off = (pvt->dhar & 0x0000FF80);
1623        hole_valid = (pvt->dhar & 0x1);
1624        dct_sel_base_off = (pvt->dram_ctl_select_high & 0xFFFFFC00) << 16;
1625
1626        debugf1("   HoleOffset=0x%x  HoleValid=0x%x IntlvSel=0x%x\n",
1627                        hole_off, hole_valid, intlv_sel);
1628
1629        if (intlv_en ||
1630            (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1631                return -EINVAL;
1632
1633        dct_sel_base = dct_sel_baseaddr(pvt);
1634
1635        /*
1636         * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1637         * select between DCT0 and DCT1.
1638         */
1639        if (dct_high_range_enabled(pvt) &&
1640           !dct_ganging_enabled(pvt) &&
1641           ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1642                high_range = 1;
1643
1644        channel = f10_determine_channel(pvt, sys_addr, high_range, intlv_en);
1645
1646        chan_addr = f10_get_base_addr_offset(sys_addr, high_range, dct_sel_base,
1647                                             dct_sel_base_off, hole_valid,
1648                                             hole_off, dram_base);
1649
1650        intlv_shift = f10_map_intlv_en_to_shift(intlv_en);
1651
1652        /* remove Node ID (in case of memory interleaving) */
1653        tmp = chan_addr & 0xFC0;
1654
1655        chan_addr = ((chan_addr >> intlv_shift) & 0xFFFFFFFFF000ULL) | tmp;
1656
1657        /* remove channel interleave and hash */
1658        if (dct_interleave_enabled(pvt) &&
1659           !dct_high_range_enabled(pvt) &&
1660           !dct_ganging_enabled(pvt)) {
1661                if (dct_sel_interleave_addr(pvt) != 1)
1662                        chan_addr = (chan_addr >> 1) & 0xFFFFFFFFFFFFFFC0ULL;
1663                else {
1664                        tmp = chan_addr & 0xFC0;
1665                        chan_addr = ((chan_addr & 0xFFFFFFFFFFFFC000ULL) >> 1)
1666                                        | tmp;
1667                }
1668        }
1669
1670        debugf1("   (ChannelAddrLong=0x%llx) >> 8 becomes InputAddr=0x%x\n",
1671                chan_addr, (u32)(chan_addr >> 8));
1672
1673        cs_found = f10_lookup_addr_in_dct(chan_addr >> 8, node_id, channel);
1674
1675        if (cs_found >= 0) {
1676                *nid = node_id;
1677                *chan_sel = channel;
1678        }
1679        return cs_found;
1680}
1681
1682static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1683                                       int *node, int *chan_sel)
1684{
1685        int dram_range, cs_found = -EINVAL;
1686        u64 dram_base, dram_limit;
1687
1688        for (dram_range = 0; dram_range < DRAM_REG_COUNT; dram_range++) {
1689
1690                if (!pvt->dram_rw_en[dram_range])
1691                        continue;
1692
1693                dram_base = pvt->dram_base[dram_range];
1694                dram_limit = pvt->dram_limit[dram_range];
1695
1696                if ((dram_base <= sys_addr) && (sys_addr <= dram_limit)) {
1697
1698                        cs_found = f10_match_to_this_node(pvt, dram_range,
1699                                                          sys_addr, node,
1700                                                          chan_sel);
1701                        if (cs_found >= 0)
1702                                break;
1703                }
1704        }
1705        return cs_found;
1706}
1707
1708/*
1709 * This the F10h reference code from AMD to map a @sys_addr to NodeID,
1710 * CSROW, Channel.
1711 *
1712 * The @sys_addr is usually an error address received from the hardware.
1713 */
1714static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1715                                     struct err_regs *info,
1716                                     u64 sys_addr)
1717{
1718        struct amd64_pvt *pvt = mci->pvt_info;
1719        u32 page, offset;
1720        unsigned short syndrome;
1721        int nid, csrow, chan = 0;
1722
1723        csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
1724
1725        if (csrow >= 0) {
1726                error_address_to_page_and_offset(sys_addr, &page, &offset);
1727
1728                syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
1729                syndrome |= LOW_SYNDROME(info->nbsh);
1730
1731                /*
1732                 * Is CHIPKILL on? If so, then we can attempt to use the
1733                 * syndrome to isolate which channel the error was on.
1734                 */
1735                if (pvt->nbcfg & K8_NBCFG_CHIPKILL)
1736                        chan = get_channel_from_ecc_syndrome(syndrome);
1737
1738                if (chan >= 0) {
1739                        edac_mc_handle_ce(mci, page, offset, syndrome,
1740                                        csrow, chan, EDAC_MOD_STR);
1741                } else {
1742                        /*
1743                         * Channel unknown, report all channels on this
1744                         * CSROW as failed.
1745                         */
1746                        for (chan = 0; chan < mci->csrows[csrow].nr_channels;
1747                                                                chan++) {
1748                                        edac_mc_handle_ce(mci, page, offset,
1749                                                        syndrome,
1750                                                        csrow, chan,
1751                                                        EDAC_MOD_STR);
1752                        }
1753                }
1754
1755        } else {
1756                edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1757        }
1758}
1759
1760/*
1761 * Input (@index) is the DBAM DIMM value (1 of 4) used as an index into a shift
1762 * table (revf_quad_ddr2_shift) which starts at 128MB DIMM size. Index of 0
1763 * indicates an empty DIMM slot, as reported by Hardware on empty slots.
1764 *
1765 * Normalize to 128MB by subracting 27 bit shift.
1766 */
1767static int map_dbam_to_csrow_size(int index)
1768{
1769        int mega_bytes = 0;
1770
1771        if (index > 0 && index <= DBAM_MAX_VALUE)
1772                mega_bytes = ((128 << (revf_quad_ddr2_shift[index]-27)));
1773
1774        return mega_bytes;
1775}
1776
1777/*
1778 * debug routine to display the memory sizes of a DIMM (ganged or not) and it
1779 * CSROWs as well
1780 */
1781static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt,
1782                                         int ganged)
1783{
1784        int dimm, size0, size1;
1785        u32 dbam;
1786        u32 *dcsb;
1787
1788        debugf1("  dbam%d: 0x%8.08x  CSROW is %s\n", ctrl,
1789                        ctrl ? pvt->dbam1 : pvt->dbam0,
1790                        ganged ? "GANGED - dbam1 not used" : "NON-GANGED");
1791
1792        dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
1793        dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0;
1794
1795        /* Dump memory sizes for DIMM and its CSROWs */
1796        for (dimm = 0; dimm < 4; dimm++) {
1797
1798                size0 = 0;
1799                if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE)
1800                        size0 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam));
1801
1802                size1 = 0;
1803                if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE)
1804                        size1 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam));
1805
1806                debugf1("     CTRL-%d DIMM-%d=%5dMB   CSROW-%d=%5dMB "
1807                                "CSROW-%d=%5dMB\n",
1808                                ctrl,
1809                                dimm,
1810                                size0 + size1,
1811                                dimm * 2,
1812                                size0,
1813                                dimm * 2 + 1,
1814                                size1);
1815        }
1816}
1817
1818/*
1819 * Very early hardware probe on pci_probe thread to determine if this module
1820 * supports the hardware.
1821 *
1822 * Return:
1823 *      0 for OK
1824 *      1 for error
1825 */
1826static int f10_probe_valid_hardware(struct amd64_pvt *pvt)
1827{
1828        int ret = 0;
1829
1830        /*
1831         * If we are on a DDR3 machine, we don't know yet if
1832         * we support that properly at this time
1833         */
1834        if ((pvt->dchr0 & F10_DCHR_Ddr3Mode) ||
1835            (pvt->dchr1 & F10_DCHR_Ddr3Mode)) {
1836
1837                amd64_printk(KERN_WARNING,
1838                        "%s() This machine is running with DDR3 memory. "
1839                        "This is not currently supported. "
1840                        "DCHR0=0x%x DCHR1=0x%x\n",
1841                        __func__, pvt->dchr0, pvt->dchr1);
1842
1843                amd64_printk(KERN_WARNING,
1844                        "   Contact '%s' module MAINTAINER to help add"
1845                        " support.\n",
1846                        EDAC_MOD_STR);
1847
1848                ret = 1;
1849
1850        }
1851        return ret;
1852}
1853
1854/*
1855 * There currently are 3 types type of MC devices for AMD Athlon/Opterons
1856 * (as per PCI DEVICE_IDs):
1857 *
1858 * Family K8: That is the Athlon64 and Opteron CPUs. They all have the same PCI
1859 * DEVICE ID, even though there is differences between the different Revisions
1860 * (CG,D,E,F).
1861 *
1862 * Family F10h and F11h.
1863 *
1864 */
1865static struct amd64_family_type amd64_family_types[] = {
1866        [K8_CPUS] = {
1867                .ctl_name = "RevF",
1868                .addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
1869                .misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC,
1870                .ops = {
1871                        .early_channel_count = k8_early_channel_count,
1872                        .get_error_address = k8_get_error_address,
1873                        .read_dram_base_limit = k8_read_dram_base_limit,
1874                        .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
1875                        .dbam_map_to_pages = k8_dbam_map_to_pages,
1876                }
1877        },
1878        [F10_CPUS] = {
1879                .ctl_name = "Family 10h",
1880                .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP,
1881                .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC,
1882                .ops = {
1883                        .probe_valid_hardware = f10_probe_valid_hardware,
1884                        .early_channel_count = f10_early_channel_count,
1885                        .get_error_address = f10_get_error_address,
1886                        .read_dram_base_limit = f10_read_dram_base_limit,
1887                        .read_dram_ctl_register = f10_read_dram_ctl_register,
1888                        .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
1889                        .dbam_map_to_pages = f10_dbam_map_to_pages,
1890                }
1891        },
1892        [F11_CPUS] = {
1893                .ctl_name = "Family 11h",
1894                .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP,
1895                .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC,
1896                .ops = {
1897                        .probe_valid_hardware = f10_probe_valid_hardware,
1898                        .early_channel_count = f10_early_channel_count,
1899                        .get_error_address = f10_get_error_address,
1900                        .read_dram_base_limit = f10_read_dram_base_limit,
1901                        .read_dram_ctl_register = f10_read_dram_ctl_register,
1902                        .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
1903                        .dbam_map_to_pages = f10_dbam_map_to_pages,
1904                }
1905        },
1906};
1907
1908static struct pci_dev *pci_get_related_function(unsigned int vendor,
1909                                                unsigned int device,
1910                                                struct pci_dev *related)
1911{
1912        struct pci_dev *dev = NULL;
1913
1914        dev = pci_get_device(vendor, device, dev);
1915        while (dev) {
1916                if ((dev->bus->number == related->bus->number) &&
1917                    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1918                        break;
1919                dev = pci_get_device(vendor, device, dev);
1920        }
1921
1922        return dev;
1923}
1924
1925/*
1926 * syndrome mapping table for ECC ChipKill devices
1927 *
1928 * The comment in each row is the token (nibble) number that is in error.
1929 * The least significant nibble of the syndrome is the mask for the bits
1930 * that are in error (need to be toggled) for the particular nibble.
1931 *
1932 * Each row contains 16 entries.
1933 * The first entry (0th) is the channel number for that row of syndromes.
1934 * The remaining 15 entries are the syndromes for the respective Error
1935 * bit mask index.
1936 *
1937 * 1st index entry is 0x0001 mask, indicating that the rightmost bit is the
1938 * bit in error.
1939 * The 2nd index entry is 0x0010 that the second bit is damaged.
1940 * The 3rd index entry is 0x0011 indicating that the rightmost 2 bits
1941 * are damaged.
1942 * Thus so on until index 15, 0x1111, whose entry has the syndrome
1943 * indicating that all 4 bits are damaged.
1944 *
1945 * A search is performed on this table looking for a given syndrome.
1946 *
1947 * See the AMD documentation for ECC syndromes. This ECC table is valid
1948 * across all the versions of the AMD64 processors.
1949 *
1950 * A fast lookup is to use the LAST four bits of the 16-bit syndrome as a
1951 * COLUMN index, then search all ROWS of that column, looking for a match
1952 * with the input syndrome. The ROW value will be the token number.
1953 *
1954 * The 0'th entry on that row, can be returned as the CHANNEL (0 or 1) of this
1955 * error.
1956 */
1957#define NUMBER_ECC_ROWS  36
1958static const unsigned short ecc_chipkill_syndromes[NUMBER_ECC_ROWS][16] = {
1959        /* Channel 0 syndromes */
1960        {/*0*/  0, 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57,
1961           0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df },
1962        {/*1*/  0, 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7,
1963           0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f },
1964        {/*2*/  0, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
1965           0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f },
1966        {/*3*/  0, 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057,
1967           0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df },
1968        {/*4*/  0, 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097,
1969           0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f },
1970        {/*5*/  0, 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857,
1971           0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf },
1972        {/*6*/  0, 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467,
1973           0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f },
1974        {/*7*/  0, 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27,
1975           0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff },
1976        {/*8*/  0, 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177,
1977           0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f },
1978        {/*9*/  0, 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07,
1979           0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f },
1980        {/*a*/  0, 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07,
1981           0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f },
1982        {/*b*/  0, 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7,
1983           0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f },
1984        {/*c*/  0, 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87,
1985           0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f },
1986        {/*d*/  0, 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067,
1987           0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f },
1988        {/*e*/  0, 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77,
1989           0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f },
1990        {/*f*/  0, 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77,
1991           0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x956d, 0xa6ee, 0xb72f },
1992
1993        /* Channel 1 syndromes */
1994        {/*10*/ 1, 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187,
1995           0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f },
1996        {/*11*/ 1, 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627,
1997           0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff },
1998        {/*12*/ 1, 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97,
1999           0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f },
2000        {/*13*/ 1, 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97,
2001           0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f },
2002        {/*14*/ 1, 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987,
2003           0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f },
2004        {/*15*/ 1, 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677,
2005           0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f },
2006        {/*16*/ 1, 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387,
2007           0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f },
2008        {/*17*/ 1, 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17,
2009           0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf },
2010        {/*18*/ 1, 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7,
2011           0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f },
2012        {/*19*/ 1, 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397,
2013           0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f },
2014        {/*1a*/ 1, 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617,
2015           0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf },
2016        {/*1b*/ 1, 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527,
2017           0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff },
2018        {/*1c*/ 1, 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7,
2019           0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef },
2020        {/*1d*/ 1, 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7,
2021           0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def },
2022        {/*1e*/ 1, 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67,
2023           0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f },
2024        {/*1f*/ 1, 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377,
2025           0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f },
2026
2027        /* ECC bits are also in the set of tokens and they too can go bad
2028         * first 2 cover channel 0, while the second 2 cover channel 1
2029         */
2030        {/*20*/ 0, 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807,
2031           0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f },
2032        {/*21*/ 0, 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07,
2033           0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f },
2034        {/*22*/ 1, 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97,
2035           0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f },
2036        {/*23*/ 1, 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757,
2037           0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df }
2038};
2039
2040/*
2041 * Given the syndrome argument, scan each of the channel tables for a syndrome
2042 * match. Depending on which table it is found, return the channel number.
2043 */
2044static int get_channel_from_ecc_syndrome(unsigned short syndrome)
2045{
2046        int row;
2047        int column;
2048
2049        /* Determine column to scan */
2050        column = syndrome & 0xF;
2051
2052        /* Scan all rows, looking for syndrome, or end of table */
2053        for (row = 0; row < NUMBER_ECC_ROWS; row++) {
2054                if (ecc_chipkill_syndromes[row][column] == syndrome)
2055                        return ecc_chipkill_syndromes[row][0];
2056        }
2057
2058        debugf0("syndrome(%x) not found\n", syndrome);
2059        return -1;
2060}
2061
2062/*
2063 * Check for valid error in the NB Status High register. If so, proceed to read
2064 * NB Status Low, NB Address Low and NB Address High registers and store data
2065 * into error structure.
2066 *
2067 * Returns:
2068 *      - 1: if hardware regs contains valid error info
2069 *      - 0: if no valid error is indicated
2070 */
2071static int amd64_get_error_info_regs(struct mem_ctl_info *mci,
2072                                     struct err_regs *regs)
2073{
2074        struct amd64_pvt *pvt;
2075        struct pci_dev *misc_f3_ctl;
2076        int err = 0;
2077
2078        pvt = mci->pvt_info;
2079        misc_f3_ctl = pvt->misc_f3_ctl;
2080
2081        err = pci_read_config_dword(misc_f3_ctl, K8_NBSH, &regs->nbsh);
2082        if (err)
2083                goto err_reg;
2084
2085        if (!(regs->nbsh & K8_NBSH_VALID_BIT))
2086                return 0;
2087
2088        /* valid error, read remaining error information registers */
2089        err = pci_read_config_dword(misc_f3_ctl, K8_NBSL, &regs->nbsl);
2090        if (err)
2091                goto err_reg;
2092
2093        err = pci_read_config_dword(misc_f3_ctl, K8_NBEAL, &regs->nbeal);
2094        if (err)
2095                goto err_reg;
2096
2097        err = pci_read_config_dword(misc_f3_ctl, K8_NBEAH, &regs->nbeah);
2098        if (err)
2099                goto err_reg;
2100
2101        err = pci_read_config_dword(misc_f3_ctl, K8_NBCFG, &regs->nbcfg);
2102        if (err)
2103                goto err_reg;
2104
2105        return 1;
2106
2107err_reg:
2108        debugf0("Reading error info register failed\n");
2109        return 0;
2110}
2111
2112/*
2113 * This function is called to retrieve the error data from hardware and store it
2114 * in the info structure.
2115 *
2116 * Returns:
2117 *      - 1: if a valid error is found
2118 *      - 0: if no error is found
2119 */
2120static int amd64_get_error_info(struct mem_ctl_info *mci,
2121                                struct err_regs *info)
2122{
2123        struct amd64_pvt *pvt;
2124        struct err_regs regs;
2125
2126        pvt = mci->pvt_info;
2127
2128        if (!amd64_get_error_info_regs(mci, info))
2129                return 0;
2130
2131        /*
2132         * Here's the problem with the K8's EDAC reporting: There are four
2133         * registers which report pieces of error information. They are shared
2134         * between CEs and UEs. Furthermore, contrary to what is stated in the
2135         * BKDG, the overflow bit is never used! Every error always updates the
2136         * reporting registers.
2137         *
2138         * Can you see the race condition? All four error reporting registers
2139         * must be read before a new error updates them! There is no way to read
2140         * all four registers atomically. The best than can be done is to detect
2141         * that a race has occured and then report the error without any kind of
2142         * precision.
2143         *
2144         * What is still positive is that errors are still reported and thus
2145         * problems can still be detected - just not localized because the
2146         * syndrome and address are spread out across registers.
2147         *
2148         * Grrrrr!!!!!  Here's hoping that AMD fixes this in some future K8 rev.
2149         * UEs and CEs should have separate register sets with proper overflow
2150         * bits that are used! At very least the problem can be fixed by
2151         * honoring the ErrValid bit in 'nbsh' and not updating registers - just
2152         * set the overflow bit - unless the current error is CE and the new
2153         * error is UE which would be the only situation for overwriting the
2154         * current values.
2155         */
2156
2157        regs = *info;
2158
2159        /* Use info from the second read - most current */
2160        if (unlikely(!amd64_get_error_info_regs(mci, info)))
2161                return 0;
2162
2163        /* clear the error bits in hardware */
2164        pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT);
2165
2166        /* Check for the possible race condition */
2167        if ((regs.nbsh != info->nbsh) ||
2168             (regs.nbsl != info->nbsl) ||
2169             (regs.nbeah != info->nbeah) ||
2170             (regs.nbeal != info->nbeal)) {
2171                amd64_mc_printk(mci, KERN_WARNING,
2172                                "hardware STATUS read access race condition "
2173                                "detected!\n");
2174                return 0;
2175        }
2176        return 1;
2177}
2178
2179/*
2180 * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
2181 * ADDRESS and process.
2182 */
2183static void amd64_handle_ce(struct mem_ctl_info *mci,
2184                            struct err_regs *info)
2185{
2186        struct amd64_pvt *pvt = mci->pvt_info;
2187        u64 SystemAddress;
2188
2189        /* Ensure that the Error Address is VALID */
2190        if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
2191                amd64_mc_printk(mci, KERN_ERR,
2192                        "HW has no ERROR_ADDRESS available\n");
2193                edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
2194                return;
2195        }
2196
2197        SystemAddress = extract_error_address(mci, info);
2198
2199        amd64_mc_printk(mci, KERN_ERR,
2200                "CE ERROR_ADDRESS= 0x%llx\n", SystemAddress);
2201
2202        pvt->ops->map_sysaddr_to_csrow(mci, info, SystemAddress);
2203}
2204
2205/* Handle any Un-correctable Errors (UEs) */
2206static void amd64_handle_ue(struct mem_ctl_info *mci,
2207                            struct err_regs *info)
2208{
2209        int csrow;
2210        u64 SystemAddress;
2211        u32 page, offset;
2212        struct mem_ctl_info *log_mci, *src_mci = NULL;
2213
2214        log_mci = mci;
2215
2216        if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
2217                amd64_mc_printk(mci, KERN_CRIT,
2218                        "HW has no ERROR_ADDRESS available\n");
2219                edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
2220                return;
2221        }
2222
2223        SystemAddress = extract_error_address(mci, info);
2224
2225        /*
2226         * Find out which node the error address belongs to. This may be
2227         * different from the node that detected the error.
2228         */
2229        src_mci = find_mc_by_sys_addr(mci, SystemAddress);
2230        if (!src_mci) {
2231                amd64_mc_printk(mci, KERN_CRIT,
2232                        "ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n",
2233                        (unsigned long)SystemAddress);
2234                edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
2235                return;
2236        }
2237
2238        log_mci = src_mci;
2239
2240        csrow = sys_addr_to_csrow(log_mci, SystemAddress);
2241        if (csrow < 0) {
2242                amd64_mc_printk(mci, KERN_CRIT,
2243                        "ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n",
2244                        (unsigned long)SystemAddress);
2245                edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
2246        } else {
2247                error_address_to_page_and_offset(SystemAddress, &page, &offset);
2248                edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
2249        }
2250}
2251
2252static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
2253                                            struct err_regs *info)
2254{
2255        u32 ec  = ERROR_CODE(info->nbsl);
2256        u32 xec = EXT_ERROR_CODE(info->nbsl);
2257        int ecc_type = (info->nbsh >> 13) & 0x3;
2258
2259        /* Bail early out if this was an 'observed' error */
2260        if (PP(ec) == K8_NBSL_PP_OBS)
2261                return;
2262
2263        /* Do only ECC errors */
2264        if (xec && xec != F10_NBSL_EXT_ERR_ECC)
2265                return;
2266
2267        if (ecc_type == 2)
2268                amd64_handle_ce(mci, info);
2269        else if (ecc_type == 1)
2270                amd64_handle_ue(mci, info);
2271
2272        /*
2273         * If main error is CE then overflow must be CE.  If main error is UE
2274         * then overflow is unknown.  We'll call the overflow a CE - if
2275         * panic_on_ue is set then we're already panic'ed and won't arrive
2276         * here. Else, then apparently someone doesn't think that UE's are
2277         * catastrophic.
2278         */
2279        if (info->nbsh & K8_NBSH_OVERFLOW)
2280                edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow");
2281}
2282
2283void amd64_decode_bus_error(int node_id, struct err_regs *regs)
2284{
2285        struct mem_ctl_info *mci = mci_lookup[node_id];
2286
2287        __amd64_decode_bus_error(mci, regs);
2288
2289        /*
2290         * Check the UE bit of the NB status high register, if set generate some
2291         * logs. If NOT a GART error, then process the event as a NO-INFO event.
2292         * If it was a GART error, skip that process.
2293         *
2294         * FIXME: this should go somewhere else, if at all.
2295         */
2296        if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
2297                edac_mc_handle_ue_no_info(mci, "UE bit is set");
2298
2299}
2300
2301/*
2302 * The main polling 'check' function, called FROM the edac core to perform the
2303 * error checking and if an error is encountered, error processing.
2304 */
2305static void amd64_check(struct mem_ctl_info *mci)
2306{
2307        struct err_regs regs;
2308
2309        if (amd64_get_error_info(mci, &regs)) {
2310                struct amd64_pvt *pvt = mci->pvt_info;
2311                amd_decode_nb_mce(pvt->mc_node_id, &regs, 1);
2312        }
2313}
2314
2315/*
2316 * Input:
2317 *      1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
2318 *      2) AMD Family index value
2319 *
2320 * Ouput:
2321 *      Upon return of 0, the following filled in:
2322 *
2323 *              struct pvt->addr_f1_ctl
2324 *              struct pvt->misc_f3_ctl
2325 *
2326 *      Filled in with related device funcitions of 'dram_f2_ctl'
2327 *      These devices are "reserved" via the pci_get_device()
2328 *
2329 *      Upon return of 1 (error status):
2330 *
2331 *              Nothing reserved
2332 */
2333static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx)
2334{
2335        const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx];
2336
2337        /* Reserve the ADDRESS MAP Device */
2338        pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
2339                                                    amd64_dev->addr_f1_ctl,
2340                                                    pvt->dram_f2_ctl);
2341
2342        if (!pvt->addr_f1_ctl) {
2343                amd64_printk(KERN_ERR, "error address map device not found: "
2344                             "vendor %x device 0x%x (broken BIOS?)\n",
2345                             PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl);
2346                return 1;
2347        }
2348
2349        /* Reserve the MISC Device */
2350        pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
2351                                                    amd64_dev->misc_f3_ctl,
2352                                                    pvt->dram_f2_ctl);
2353
2354        if (!pvt->misc_f3_ctl) {
2355                pci_dev_put(pvt->addr_f1_ctl);
2356                pvt->addr_f1_ctl = NULL;
2357
2358                amd64_printk(KERN_ERR, "error miscellaneous device not found: "
2359                             "vendor %x device 0x%x (broken BIOS?)\n",
2360                             PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl);
2361                return 1;
2362        }
2363
2364        debugf1("    Addr Map device PCI Bus ID:\t%s\n",
2365                pci_name(pvt->addr_f1_ctl));
2366        debugf1("    DRAM MEM-CTL PCI Bus ID:\t%s\n",
2367                pci_name(pvt->dram_f2_ctl));
2368        debugf1("    Misc device PCI Bus ID:\t%s\n",
2369                pci_name(pvt->misc_f3_ctl));
2370
2371        return 0;
2372}
2373
2374static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
2375{
2376        pci_dev_put(pvt->addr_f1_ctl);
2377        pci_dev_put(pvt->misc_f3_ctl);
2378}
2379
2380/*
2381 * Retrieve the hardware registers of the memory controller (this includes the
2382 * 'Address Map' and 'Misc' device regs)
2383 */
2384static void amd64_read_mc_registers(struct amd64_pvt *pvt)
2385{
2386        u64 msr_val;
2387        int dram, err = 0;
2388
2389        /*
2390         * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2391         * those are Read-As-Zero
2392         */
2393        rdmsrl(MSR_K8_TOP_MEM1, msr_val);
2394        pvt->top_mem = msr_val >> 23;
2395        debugf0("  TOP_MEM=0x%08llx\n", pvt->top_mem);
2396
2397        /* check first whether TOP_MEM2 is enabled */
2398        rdmsrl(MSR_K8_SYSCFG, msr_val);
2399        if (msr_val & (1U << 21)) {
2400                rdmsrl(MSR_K8_TOP_MEM2, msr_val);
2401                pvt->top_mem2 = msr_val >> 23;
2402                debugf0("  TOP_MEM2=0x%08llx\n", pvt->top_mem2);
2403        } else
2404                debugf0("  TOP_MEM2 disabled.\n");
2405
2406        amd64_cpu_display_info(pvt);
2407
2408        err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap);
2409        if (err)
2410                goto err_reg;
2411
2412        if (pvt->ops->read_dram_ctl_register)
2413                pvt->ops->read_dram_ctl_register(pvt);
2414
2415        for (dram = 0; dram < DRAM_REG_COUNT; dram++) {
2416                /*
2417                 * Call CPU specific READ function to get the DRAM Base and
2418                 * Limit values from the DCT.
2419                 */
2420                pvt->ops->read_dram_base_limit(pvt, dram);
2421
2422                /*
2423                 * Only print out debug info on rows with both R and W Enabled.
2424                 * Normal processing, compiler should optimize this whole 'if'
2425                 * debug output block away.
2426                 */
2427                if (pvt->dram_rw_en[dram] != 0) {
2428                        debugf1("  DRAM_BASE[%d]: 0x%8.08x-%8.08x "
2429                                "DRAM_LIMIT:  0x%8.08x-%8.08x\n",
2430                                dram,
2431                                (u32)(pvt->dram_base[dram] >> 32),
2432                                (u32)(pvt->dram_base[dram] & 0xFFFFFFFF),
2433                                (u32)(pvt->dram_limit[dram] >> 32),
2434                                (u32)(pvt->dram_limit[dram] & 0xFFFFFFFF));
2435                        debugf1("        IntlvEn=%s %s %s "
2436                                "IntlvSel=%d DstNode=%d\n",
2437                                pvt->dram_IntlvEn[dram] ?
2438                                        "Enabled" : "Disabled",
2439                                (pvt->dram_rw_en[dram] & 0x2) ? "W" : "!W",
2440                                (pvt->dram_rw_en[dram] & 0x1) ? "R" : "!R",
2441                                pvt->dram_IntlvSel[dram],
2442                                pvt->dram_DstNode[dram]);
2443                }
2444        }
2445
2446        amd64_read_dct_base_mask(pvt);
2447
2448        err = pci_read_config_dword(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar);
2449        if (err)
2450                goto err_reg;
2451
2452        amd64_read_dbam_reg(pvt);
2453
2454        err = pci_read_config_dword(pvt->misc_f3_ctl,
2455                                F10_ONLINE_SPARE, &pvt->online_spare);
2456        if (err)
2457                goto err_reg;
2458
2459        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
2460        if (err)
2461                goto err_reg;
2462
2463        err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);
2464        if (err)
2465                goto err_reg;
2466
2467        if (!dct_ganging_enabled(pvt)) {
2468                err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1,
2469                                                &pvt->dclr1);
2470                if (err)
2471                        goto err_reg;
2472
2473                err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_1,
2474                                                &pvt->dchr1);
2475                if (err)
2476                        goto err_reg;
2477        }
2478
2479        amd64_dump_misc_regs(pvt);
2480
2481        return;
2482
2483err_reg:
2484        debugf0("Reading an MC register failed\n");
2485
2486}
2487
2488/*
2489 * NOTE: CPU Revision Dependent code
2490 *
2491 * Input:
2492 *      @csrow_nr ChipSelect Row Number (0..pvt->cs_count-1)
2493 *      k8 private pointer to -->
2494 *                      DRAM Bank Address mapping register
2495 *                      node_id
2496 *                      DCL register where dual_channel_active is
2497 *
2498 * The DBAM register consists of 4 sets of 4 bits each definitions:
2499 *
2500 * Bits:        CSROWs
2501 * 0-3          CSROWs 0 and 1
2502 * 4-7          CSROWs 2 and 3
2503 * 8-11         CSROWs 4 and 5
2504 * 12-15        CSROWs 6 and 7
2505 *
2506 * Values range from: 0 to 15
2507 * The meaning of the values depends on CPU revision and dual-channel state,
2508 * see relevant BKDG more info.
2509 *
2510 * The memory controller provides for total of only 8 CSROWs in its current
2511 * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2512 * single channel or two (2) DIMMs in dual channel mode.
2513 *
2514 * The following code logic collapses the various tables for CSROW based on CPU
2515 * revision.
2516 *
2517 * Returns:
2518 *      The number of PAGE_SIZE pages on the specified CSROW number it
2519 *      encompasses
2520 *
2521 */
2522static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
2523{
2524        u32 dram_map, nr_pages;
2525
2526        /*
2527         * The math on this doesn't look right on the surface because x/2*4 can
2528         * be simplified to x*2 but this expression makes use of the fact that
2529         * it is integral math where 1/2=0. This intermediate value becomes the
2530         * number of bits to shift the DBAM register to extract the proper CSROW
2531         * field.
2532         */
2533        dram_map = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF;
2534
2535        nr_pages = pvt->ops->dbam_map_to_pages(pvt, dram_map);
2536
2537        /*
2538         * If dual channel then double the memory size of single channel.
2539         * Channel count is 1 or 2
2540         */
2541        nr_pages <<= (pvt->channel_count - 1);
2542
2543        debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, dram_map);
2544        debugf0("    nr_pages= %u  channel-count = %d\n",
2545                nr_pages, pvt->channel_count);
2546
2547        return nr_pages;
2548}
2549
2550/*
2551 * Initialize the array of csrow attribute instances, based on the values
2552 * from pci config hardware registers.
2553 */
2554static int amd64_init_csrows(struct mem_ctl_info *mci)
2555{
2556        struct csrow_info *csrow;
2557        struct amd64_pvt *pvt;
2558        u64 input_addr_min, input_addr_max, sys_addr;
2559        int i, err = 0, empty = 1;
2560
2561        pvt = mci->pvt_info;
2562
2563        err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg);
2564        if (err)
2565                debugf0("Reading K8_NBCFG failed\n");
2566
2567        debugf0("NBCFG= 0x%x  CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
2568                (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
2569                (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
2570                );
2571
2572        for (i = 0; i < pvt->cs_count; i++) {
2573                csrow = &mci->csrows[i];
2574
2575                if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
2576                        debugf1("----CSROW %d EMPTY for node %d\n", i,
2577                                pvt->mc_node_id);
2578                        continue;
2579                }
2580
2581                debugf1("----CSROW %d VALID for MC node %d\n",
2582                        i, pvt->mc_node_id);
2583
2584                empty = 0;
2585                csrow->nr_pages = amd64_csrow_nr_pages(i, pvt);
2586                find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
2587                sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
2588                csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
2589                sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
2590                csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
2591                csrow->page_mask = ~mask_from_dct_mask(pvt, i);
2592                /* 8 bytes of resolution */
2593
2594                csrow->mtype = amd64_determine_memory_type(pvt);
2595
2596                debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
2597                debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
2598                        (unsigned long)input_addr_min,
2599                        (unsigned long)input_addr_max);
2600                debugf1("    sys_addr: 0x%lx  page_mask: 0x%lx\n",
2601                        (unsigned long)sys_addr, csrow->page_mask);
2602                debugf1("    nr_pages: %u  first_page: 0x%lx "
2603                        "last_page: 0x%lx\n",
2604                        (unsigned)csrow->nr_pages,
2605                        csrow->first_page, csrow->last_page);
2606
2607                /*
2608                 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
2609                 */
2610                if (pvt->nbcfg & K8_NBCFG_ECC_ENABLE)
2611                        csrow->edac_mode =
2612                            (pvt->nbcfg & K8_NBCFG_CHIPKILL) ?
2613                            EDAC_S4ECD4ED : EDAC_SECDED;
2614                else
2615                        csrow->edac_mode = EDAC_NONE;
2616        }
2617
2618        return empty;
2619}
2620
2621/*
2622 * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
2623 * enable it.
2624 */
2625static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
2626{
2627        struct amd64_pvt *pvt = mci->pvt_info;
2628        const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
2629        int cpu, idx = 0, err = 0;
2630        struct msr msrs[cpumask_weight(cpumask)];
2631        u32 value;
2632        u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2633
2634        if (!ecc_enable_override)
2635                return;
2636
2637        memset(msrs, 0, sizeof(msrs));
2638
2639        amd64_printk(KERN_WARNING,
2640                "'ecc_enable_override' parameter is active, "
2641                "Enabling AMD ECC hardware now: CAUTION\n");
2642
2643        err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
2644        if (err)
2645                debugf0("Reading K8_NBCTL failed\n");
2646
2647        /* turn on UECCn and CECCEn bits */
2648        pvt->old_nbctl = value & mask;
2649        pvt->nbctl_mcgctl_saved = 1;
2650
2651        value |= mask;
2652        pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
2653
2654        rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2655
2656        for_each_cpu(cpu, cpumask) {
2657                if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
2658                        set_bit(idx, &pvt->old_mcgctl);
2659
2660                msrs[idx].l |= K8_MSR_MCGCTL_NBE;
2661                idx++;
2662        }
2663        wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2664
2665        err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2666        if (err)
2667                debugf0("Reading K8_NBCFG failed\n");
2668
2669        debugf0("NBCFG(1)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
2670                (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
2671                (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
2672
2673        if (!(value & K8_NBCFG_ECC_ENABLE)) {
2674                amd64_printk(KERN_WARNING,
2675                        "This node reports that DRAM ECC is "
2676                        "currently Disabled; ENABLING now\n");
2677
2678                /* Attempt to turn on DRAM ECC Enable */
2679                value |= K8_NBCFG_ECC_ENABLE;
2680                pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
2681
2682                err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2683                if (err)
2684                        debugf0("Reading K8_NBCFG failed\n");
2685
2686                if (!(value & K8_NBCFG_ECC_ENABLE)) {
2687                        amd64_printk(KERN_WARNING,
2688                                "Hardware rejects Enabling DRAM ECC checking\n"
2689                                "Check memory DIMM configuration\n");
2690                } else {
2691                        amd64_printk(KERN_DEBUG,
2692                                "Hardware accepted DRAM ECC Enable\n");
2693                }
2694        }
2695        debugf0("NBCFG(2)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
2696                (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
2697                (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
2698
2699        pvt->ctl_error_info.nbcfg = value;
2700}
2701
2702static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
2703{
2704        const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
2705        int cpu, idx = 0, err = 0;
2706        struct msr msrs[cpumask_weight(cpumask)];
2707        u32 value;
2708        u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2709
2710        if (!pvt->nbctl_mcgctl_saved)
2711                return;
2712
2713        memset(msrs, 0, sizeof(msrs));
2714
2715        err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
2716        if (err)
2717                debugf0("Reading K8_NBCTL failed\n");
2718        value &= ~mask;
2719        value |= pvt->old_nbctl;
2720
2721        /* restore the NB Enable MCGCTL bit */
2722        pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
2723
2724        rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2725
2726        for_each_cpu(cpu, cpumask) {
2727                msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
2728                msrs[idx].l |=
2729                        test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
2730                idx++;
2731        }
2732
2733        wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2734}
2735
2736/* get all cores on this DCT */
2737static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
2738{
2739        int cpu;
2740
2741        for_each_online_cpu(cpu)
2742                if (amd_get_nb_id(cpu) == nid)
2743                        cpumask_set_cpu(cpu, mask);
2744}
2745
2746/* check MCG_CTL on all the cpus on this node */
2747static bool amd64_nb_mce_bank_enabled_on_node(int nid)
2748{
2749        cpumask_t mask;
2750        struct msr *msrs;
2751        int cpu, nbe, idx = 0;
2752        bool ret = false;
2753
2754        cpumask_clear(&mask);
2755
2756        get_cpus_on_this_dct_cpumask(&mask, nid);
2757
2758        msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
2759        if (!msrs) {
2760                amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
2761                              __func__);
2762                 return false;
2763        }
2764
2765        rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
2766
2767        for_each_cpu(cpu, &mask) {
2768                nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
2769
2770                debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2771                        cpu, msrs[idx].q,
2772                        (nbe ? "enabled" : "disabled"));
2773
2774                if (!nbe)
2775                        goto out;
2776
2777                idx++;
2778        }
2779        ret = true;
2780
2781out:
2782        kfree(msrs);
2783        return ret;
2784}
2785
2786/*
2787 * EDAC requires that the BIOS have ECC enabled before taking over the
2788 * processing of ECC errors. This is because the BIOS can properly initialize
2789 * the memory system completely. A command line option allows to force-enable
2790 * hardware ECC later in amd64_enable_ecc_error_reporting().
2791 */
2792static const char *ecc_warning =
2793        "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
2794        " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
2795        " Also, use of the override can cause unknown side effects.\n";
2796
2797static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
2798{
2799        u32 value;
2800        int err = 0;
2801        u8 ecc_enabled = 0;
2802        bool nb_mce_en = false;
2803
2804        err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2805        if (err)
2806                debugf0("Reading K8_NBCTL failed\n");
2807
2808        ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
2809        if (!ecc_enabled)
2810                amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
2811                             "is currently disabled, set F3x%x[22] (%s).\n",
2812                             K8_NBCFG, pci_name(pvt->misc_f3_ctl));
2813        else
2814                amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
2815
2816        nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
2817        if (!nb_mce_en)
2818                amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
2819                             "0x%08x[4] on node %d to enable.\n",
2820                             MSR_IA32_MCG_CTL, pvt->mc_node_id);
2821
2822        if (!ecc_enabled || !nb_mce_en) {
2823                if (!ecc_enable_override) {
2824                        amd64_printk(KERN_WARNING, "%s", ecc_warning);
2825                        return -ENODEV;
2826                }
2827        } else
2828                /* CLEAR the override, since BIOS controlled it */
2829                ecc_enable_override = 0;
2830
2831        return 0;
2832}
2833
2834struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
2835                                          ARRAY_SIZE(amd64_inj_attrs) +
2836                                          1];
2837
2838struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
2839
2840static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
2841{
2842        unsigned int i = 0, j = 0;
2843
2844        for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
2845                sysfs_attrs[i] = amd64_dbg_attrs[i];
2846
2847        for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
2848                sysfs_attrs[i] = amd64_inj_attrs[j];
2849
2850        sysfs_attrs[i] = terminator;
2851
2852        mci->mc_driver_sysfs_attributes = sysfs_attrs;
2853}
2854
2855static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
2856{
2857        struct amd64_pvt *pvt = mci->pvt_info;
2858
2859        mci->mtype_cap          = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2860        mci->edac_ctl_cap       = EDAC_FLAG_NONE;
2861
2862        if (pvt->nbcap & K8_NBCAP_SECDED)
2863                mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2864
2865        if (pvt->nbcap & K8_NBCAP_CHIPKILL)
2866                mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
2867
2868        mci->edac_cap           = amd64_determine_edac_cap(pvt);
2869        mci->mod_name           = EDAC_MOD_STR;
2870        mci->mod_ver            = EDAC_AMD64_VERSION;
2871        mci->ctl_name           = get_amd_family_name(pvt->mc_type_index);
2872        mci->dev_name           = pci_name(pvt->dram_f2_ctl);
2873        mci->ctl_page_to_phys   = NULL;
2874
2875        /* IMPORTANT: Set the polling 'check' function in this module */
2876        mci->edac_check         = amd64_check;
2877
2878        /* memory scrubber interface */
2879        mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
2880        mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
2881}
2882
2883/*
2884 * Init stuff for this DRAM Controller device.
2885 *
2886 * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
2887 * Space feature MUST be enabled on ALL Processors prior to actually reading
2888 * from the ECS registers. Since the loading of the module can occur on any
2889 * 'core', and cores don't 'see' all the other processors ECS data when the
2890 * others are NOT enabled. Our solution is to first enable ECS access in this
2891 * routine on all processors, gather some data in a amd64_pvt structure and
2892 * later come back in a finish-setup function to perform that final
2893 * initialization. See also amd64_init_2nd_stage() for that.
2894 */
2895static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
2896                                    int mc_type_index)
2897{
2898        struct amd64_pvt *pvt = NULL;
2899        int err = 0, ret;
2900
2901        ret = -ENOMEM;
2902        pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
2903        if (!pvt)
2904                goto err_exit;
2905
2906        pvt->mc_node_id = get_node_id(dram_f2_ctl);
2907
2908        pvt->dram_f2_ctl        = dram_f2_ctl;
2909        pvt->ext_model          = boot_cpu_data.x86_model >> 4;
2910        pvt->mc_type_index      = mc_type_index;
2911        pvt->ops                = family_ops(mc_type_index);
2912        pvt->old_mcgctl         = 0;
2913
2914        /*
2915         * We have the dram_f2_ctl device as an argument, now go reserve its
2916         * sibling devices from the PCI system.
2917         */
2918        ret = -ENODEV;
2919        err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
2920        if (err)
2921                goto err_free;
2922
2923        ret = -EINVAL;
2924        err = amd64_check_ecc_enabled(pvt);
2925        if (err)
2926                goto err_put;
2927
2928        /*
2929         * Key operation here: setup of HW prior to performing ops on it. Some
2930         * setup is required to access ECS data. After this is performed, the
2931         * 'teardown' function must be called upon error and normal exit paths.
2932         */
2933        if (boot_cpu_data.x86 >= 0x10)
2934                amd64_setup(pvt);
2935
2936        /*
2937         * Save the pointer to the private data for use in 2nd initialization
2938         * stage
2939         */
2940        pvt_lookup[pvt->mc_node_id] = pvt;
2941
2942        return 0;
2943
2944err_put:
2945        amd64_free_mc_sibling_devices(pvt);
2946
2947err_free:
2948        kfree(pvt);
2949
2950err_exit:
2951        return ret;
2952}
2953
2954/*
2955 * This is the finishing stage of the init code. Needs to be performed after all
2956 * MCs' hardware have been prepped for accessing extended config space.
2957 */
2958static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
2959{
2960        int node_id = pvt->mc_node_id;
2961        struct mem_ctl_info *mci;
2962        int ret, err = 0;
2963
2964        amd64_read_mc_registers(pvt);
2965
2966        ret = -ENODEV;
2967        if (pvt->ops->probe_valid_hardware) {
2968                err = pvt->ops->probe_valid_hardware(pvt);
2969                if (err)
2970                        goto err_exit;
2971        }
2972
2973        /*
2974         * We need to determine how many memory channels there are. Then use
2975         * that information for calculating the size of the dynamic instance
2976         * tables in the 'mci' structure
2977         */
2978        pvt->channel_count = pvt->ops->early_channel_count(pvt);
2979        if (pvt->channel_count < 0)
2980                goto err_exit;
2981
2982        ret = -ENOMEM;
2983        mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id);
2984        if (!mci)
2985                goto err_exit;
2986
2987        mci->pvt_info = pvt;
2988
2989        mci->dev = &pvt->dram_f2_ctl->dev;
2990        amd64_setup_mci_misc_attributes(mci);
2991
2992        if (amd64_init_csrows(mci))
2993                mci->edac_cap = EDAC_FLAG_NONE;
2994
2995        amd64_enable_ecc_error_reporting(mci);
2996        amd64_set_mc_sysfs_attributes(mci);
2997
2998        ret = -ENODEV;
2999        if (edac_mc_add_mc(mci)) {
3000                debugf1("failed edac_mc_add_mc()\n");
3001                goto err_add_mc;
3002        }
3003
3004        mci_lookup[node_id] = mci;
3005        pvt_lookup[node_id] = NULL;
3006
3007        /* register stuff with EDAC MCE */
3008        if (report_gart_errors)
3009                amd_report_gart_errors(true);
3010
3011        amd_register_ecc_decoder(amd64_decode_bus_error);
3012
3013        return 0;
3014
3015err_add_mc:
3016        edac_mc_free(mci);
3017
3018err_exit:
3019        debugf0("failure to init 2nd stage: ret=%d\n", ret);
3020
3021        amd64_restore_ecc_error_reporting(pvt);
3022
3023        if (boot_cpu_data.x86 > 0xf)
3024                amd64_teardown(pvt);
3025
3026        amd64_free_mc_sibling_devices(pvt);
3027
3028        kfree(pvt_lookup[pvt->mc_node_id]);
3029        pvt_lookup[node_id] = NULL;
3030
3031        return ret;
3032}
3033
3034
3035static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
3036                                 const struct pci_device_id *mc_type)
3037{
3038        int ret = 0;
3039
3040        debugf0("(MC node=%d,mc_type='%s')\n", get_node_id(pdev),
3041                get_amd_family_name(mc_type->driver_data));
3042
3043        ret = pci_enable_device(pdev);
3044        if (ret < 0)
3045                ret = -EIO;
3046        else
3047                ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
3048
3049        if (ret < 0)
3050                debugf0("ret=%d\n", ret);
3051
3052        return ret;
3053}
3054
3055static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
3056{
3057        struct mem_ctl_info *mci;
3058        struct amd64_pvt *pvt;
3059
3060        /* Remove from EDAC CORE tracking list */
3061        mci = edac_mc_del_mc(&pdev->dev);
3062        if (!mci)
3063                return;
3064
3065        pvt = mci->pvt_info;
3066
3067        amd64_restore_ecc_error_reporting(pvt);
3068
3069        if (boot_cpu_data.x86 > 0xf)
3070                amd64_teardown(pvt);
3071
3072        amd64_free_mc_sibling_devices(pvt);
3073
3074        kfree(pvt);
3075        mci->pvt_info = NULL;
3076
3077        mci_lookup[pvt->mc_node_id] = NULL;
3078
3079        /* unregister from EDAC MCE */
3080        amd_report_gart_errors(false);
3081        amd_unregister_ecc_decoder(amd64_decode_bus_error);
3082
3083        /* Free the EDAC CORE resources */
3084        edac_mc_free(mci);
3085}
3086
3087/*
3088 * This table is part of the interface for loading drivers for PCI devices. The
3089 * PCI core identifies what devices are on a system during boot, and then
3090 * inquiry this table to see if this driver is for a given device found.
3091 */
3092static const struct pci_device_id amd64_pci_table[] __devinitdata = {
3093        {
3094                .vendor         = PCI_VENDOR_ID_AMD,
3095                .device         = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
3096                .subvendor      = PCI_ANY_ID,
3097                .subdevice      = PCI_ANY_ID,
3098                .class          = 0,
3099                .class_mask     = 0,
3100                .driver_data    = K8_CPUS
3101        },
3102        {
3103                .vendor         = PCI_VENDOR_ID_AMD,
3104                .device         = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
3105                .subvendor      = PCI_ANY_ID,
3106                .subdevice      = PCI_ANY_ID,
3107                .class          = 0,
3108                .class_mask     = 0,
3109                .driver_data    = F10_CPUS
3110        },
3111        {
3112                .vendor         = PCI_VENDOR_ID_AMD,
3113                .device         = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
3114                .subvendor      = PCI_ANY_ID,
3115                .subdevice      = PCI_ANY_ID,
3116                .class          = 0,
3117                .class_mask     = 0,
3118                .driver_data    = F11_CPUS
3119        },
3120        {0, }
3121};
3122MODULE_DEVICE_TABLE(pci, amd64_pci_table);
3123
3124static struct pci_driver amd64_pci_driver = {
3125        .name           = EDAC_MOD_STR,
3126        .probe          = amd64_init_one_instance,
3127        .remove         = __devexit_p(amd64_remove_one_instance),
3128        .id_table       = amd64_pci_table,
3129};
3130
3131static void amd64_setup_pci_device(void)
3132{
3133        struct mem_ctl_info *mci;
3134        struct amd64_pvt *pvt;
3135
3136        if (amd64_ctl_pci)
3137                return;
3138
3139        mci = mci_lookup[0];
3140        if (mci) {
3141
3142                pvt = mci->pvt_info;
3143                amd64_ctl_pci =
3144                        edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
3145                                                    EDAC_MOD_STR);
3146
3147                if (!amd64_ctl_pci) {
3148                        pr_warning("%s(): Unable to create PCI control\n",
3149                                   __func__);
3150
3151                        pr_warning("%s(): PCI error report via EDAC not set\n",
3152                                   __func__);
3153                        }
3154        }
3155}
3156
3157static int __init amd64_edac_init(void)
3158{
3159        int nb, err = -ENODEV;
3160
3161        edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
3162
3163        opstate_init();
3164
3165        if (cache_k8_northbridges() < 0)
3166                return err;
3167
3168        err = pci_register_driver(&amd64_pci_driver);
3169        if (err)
3170                return err;
3171
3172        /*
3173         * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
3174         * amd64_pvt structs. These will be used in the 2nd stage init function
3175         * to finish initialization of the MC instances.
3176         */
3177        for (nb = 0; nb < num_k8_northbridges; nb++) {
3178                if (!pvt_lookup[nb])
3179                        continue;
3180
3181                err = amd64_init_2nd_stage(pvt_lookup[nb]);
3182                if (err)
3183                        goto err_2nd_stage;
3184        }
3185
3186        amd64_setup_pci_device();
3187
3188        return 0;
3189
3190err_2nd_stage:
3191        debugf0("2nd stage failed\n");
3192        pci_unregister_driver(&amd64_pci_driver);
3193
3194        return err;
3195}
3196
3197static void __exit amd64_edac_exit(void)
3198{
3199        if (amd64_ctl_pci)
3200                edac_pci_release_generic_ctl(amd64_ctl_pci);
3201
3202        pci_unregister_driver(&amd64_pci_driver);
3203}
3204
3205module_init(amd64_edac_init);
3206module_exit(amd64_edac_exit);
3207
3208MODULE_LICENSE("GPL");
3209MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3210                "Dave Peterson, Thayne Harbaugh");
3211MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3212                EDAC_AMD64_VERSION);
3213
3214module_param(edac_op_state, int, 0444);
3215MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
3216
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.