linux/drivers/edac/i82443bxgx_edac.c
<<
>>
Prefs
   1/*
   2 * Intel 82443BX/GX (440BX/GX chipset) Memory Controller EDAC kernel
   3 * module (C) 2006 Tim Small
   4 *
   5 * This file may be distributed under the terms of the GNU General
   6 * Public License.
   7 *
   8 * Written by Tim Small <tim@buttersideup.com>, based on work by Linux
   9 * Networx, Thayne Harbaugh, Dan Hollis <goemon at anime dot net> and
  10 * others.
  11 *
  12 * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
  13 *
  14 * Written with reference to 82443BX Host Bridge Datasheet:
  15 * http://www.intel.com/design/chipsets/440/documentation.htm
  16 * references to this document given in [].
  17 *
  18 * This module doesn't support the 440LX, but it may be possible to
  19 * make it do so (the 440LX's register definitions are different, but
  20 * not completely so - I haven't studied them in enough detail to know
  21 * how easy this would be).
  22 */
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26
  27#include <linux/pci.h>
  28#include <linux/pci_ids.h>
  29
  30#include <linux/slab.h>
  31
  32#include <linux/edac.h>
  33#include "edac_core.h"
  34
  35#define I82443_REVISION "0.1"
  36
  37#define EDAC_MOD_STR    "i82443bxgx_edac"
  38
  39/* The 82443BX supports SDRAM, or EDO (EDO for mobile only), "Memory
  40 * Size: 8 MB to 512 MB (1GB with Registered DIMMs) with eight memory
  41 * rows" "The 82443BX supports multiple-bit error detection and
  42 * single-bit error correction when ECC mode is enabled and
  43 * single/multi-bit error detection when correction is disabled.
  44 * During writes to the DRAM, the 82443BX generates ECC for the data
  45 * on a QWord basis. Partial QWord writes require a read-modify-write
  46 * cycle when ECC is enabled."
  47*/
  48
  49/* "Additionally, the 82443BX ensures that the data is corrected in
  50 * main memory so that accumulation of errors is prevented. Another
  51 * error within the same QWord would result in a double-bit error
  52 * which is unrecoverable. This is known as hardware scrubbing since
  53 * it requires no software intervention to correct the data in memory."
  54 */
  55
  56/* [Also see page 100 (section 4.3), "DRAM Interface"]
  57 * [Also see page 112 (section 4.6.1.4), ECC]
  58 */
  59
  60#define I82443BXGX_NR_CSROWS 8
  61#define I82443BXGX_NR_CHANS  1
  62#define I82443BXGX_NR_DIMMS  4
  63
  64/* 82443 PCI Device 0 */
  65#define I82443BXGX_NBXCFG 0x50  /* 32bit register starting at this PCI
  66                                 * config space offset */
  67#define I82443BXGX_NBXCFG_OFFSET_NON_ECCROW 24  /* Array of bits, zero if
  68                                                 * row is non-ECC */
  69#define I82443BXGX_NBXCFG_OFFSET_DRAM_FREQ 12   /* 2 bits,00=100MHz,10=66 MHz */
  70
  71#define I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY 7       /* 2 bits:       */
  72#define I82443BXGX_NBXCFG_INTEGRITY_NONE   0x0  /* 00 = Non-ECC */
  73#define I82443BXGX_NBXCFG_INTEGRITY_EC     0x1  /* 01 = EC (only) */
  74#define I82443BXGX_NBXCFG_INTEGRITY_ECC    0x2  /* 10 = ECC */
  75#define I82443BXGX_NBXCFG_INTEGRITY_SCRUB  0x3  /* 11 = ECC + HW Scrub */
  76
  77#define I82443BXGX_NBXCFG_OFFSET_ECC_DIAG_ENABLE  6
  78
  79/* 82443 PCI Device 0 */
  80#define I82443BXGX_EAP   0x80   /* 32bit register starting at this PCI
  81                                 * config space offset, Error Address
  82                                 * Pointer Register */
  83#define I82443BXGX_EAP_OFFSET_EAP  12   /* High 20 bits of error address */
  84#define I82443BXGX_EAP_OFFSET_MBE  BIT(1)       /* Err at EAP was multi-bit (W1TC) */
  85#define I82443BXGX_EAP_OFFSET_SBE  BIT(0)       /* Err at EAP was single-bit (W1TC) */
  86
  87#define I82443BXGX_ERRCMD  0x90 /* 8bit register starting at this PCI
  88                                 * config space offset. */
  89#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_MBE BIT(1)     /* 1 = enable */
  90#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_SBE BIT(0)     /* 1 = enable */
  91
  92#define I82443BXGX_ERRSTS  0x91 /* 16bit register starting at this PCI
  93                                 * config space offset. */
  94#define I82443BXGX_ERRSTS_OFFSET_MBFRE 5        /* 3 bits - first err row multibit */
  95#define I82443BXGX_ERRSTS_OFFSET_MEF   BIT(4)   /* 1 = MBE occurred */
  96#define I82443BXGX_ERRSTS_OFFSET_SBFRE 1        /* 3 bits - first err row singlebit */
  97#define I82443BXGX_ERRSTS_OFFSET_SEF   BIT(0)   /* 1 = SBE occurred */
  98
  99#define I82443BXGX_DRAMC 0x57   /* 8bit register starting at this PCI
 100                                 * config space offset. */
 101#define I82443BXGX_DRAMC_OFFSET_DT 3    /* 2 bits, DRAM Type */
 102#define I82443BXGX_DRAMC_DRAM_IS_EDO 0  /* 00 = EDO */
 103#define I82443BXGX_DRAMC_DRAM_IS_SDRAM 1        /* 01 = SDRAM */
 104#define I82443BXGX_DRAMC_DRAM_IS_RSDRAM 2       /* 10 = Registered SDRAM */
 105
 106#define I82443BXGX_DRB 0x60     /* 8x 8bit registers starting at this PCI
 107                                 * config space offset. */
 108
 109/* FIXME - don't poll when ECC disabled? */
 110
 111struct i82443bxgx_edacmc_error_info {
 112        u32 eap;
 113};
 114
 115static struct edac_pci_ctl_info *i82443bxgx_pci;
 116
 117static struct pci_dev *mci_pdev;        /* init dev: in case that AGP code has
 118                                         * already registered driver
 119                                         */
 120
 121static int i82443bxgx_registered = 1;
 122
 123static void i82443bxgx_edacmc_get_error_info(struct mem_ctl_info *mci,
 124                                struct i82443bxgx_edacmc_error_info
 125                                *info)
 126{
 127        struct pci_dev *pdev;
 128        pdev = to_pci_dev(mci->dev);
 129        pci_read_config_dword(pdev, I82443BXGX_EAP, &info->eap);
 130        if (info->eap & I82443BXGX_EAP_OFFSET_SBE)
 131                /* Clear error to allow next error to be reported [p.61] */
 132                pci_write_bits32(pdev, I82443BXGX_EAP,
 133                                 I82443BXGX_EAP_OFFSET_SBE,
 134                                 I82443BXGX_EAP_OFFSET_SBE);
 135
 136        if (info->eap & I82443BXGX_EAP_OFFSET_MBE)
 137                /* Clear error to allow next error to be reported [p.61] */
 138                pci_write_bits32(pdev, I82443BXGX_EAP,
 139                                 I82443BXGX_EAP_OFFSET_MBE,
 140                                 I82443BXGX_EAP_OFFSET_MBE);
 141}
 142
 143static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci,
 144                                                struct
 145                                                i82443bxgx_edacmc_error_info
 146                                                *info, int handle_errors)
 147{
 148        int error_found = 0;
 149        u32 eapaddr, page, pageoffset;
 150
 151        /* bits 30:12 hold the 4kb block in which the error occurred
 152         * [p.61] */
 153        eapaddr = (info->eap & 0xfffff000);
 154        page = eapaddr >> PAGE_SHIFT;
 155        pageoffset = eapaddr - (page << PAGE_SHIFT);
 156
 157        if (info->eap & I82443BXGX_EAP_OFFSET_SBE) {
 158                error_found = 1;
 159                if (handle_errors)
 160                        edac_mc_handle_ce(mci, page, pageoffset,
 161                                /* 440BX/GX don't make syndrome information
 162                                 * available */
 163                                0, edac_mc_find_csrow_by_page(mci, page), 0,
 164                                mci->ctl_name);
 165        }
 166
 167        if (info->eap & I82443BXGX_EAP_OFFSET_MBE) {
 168                error_found = 1;
 169                if (handle_errors)
 170                        edac_mc_handle_ue(mci, page, pageoffset,
 171                                        edac_mc_find_csrow_by_page(mci, page),
 172                                        mci->ctl_name);
 173        }
 174
 175        return error_found;
 176}
 177
 178static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci)
 179{
 180        struct i82443bxgx_edacmc_error_info info;
 181
 182        debugf1("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
 183        i82443bxgx_edacmc_get_error_info(mci, &info);
 184        i82443bxgx_edacmc_process_error_info(mci, &info, 1);
 185}
 186
 187static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
 188                                struct pci_dev *pdev,
 189                                enum edac_type edac_mode,
 190                                enum mem_type mtype)
 191{
 192        struct csrow_info *csrow;
 193        int index;
 194        u8 drbar, dramc;
 195        u32 row_base, row_high_limit, row_high_limit_last;
 196
 197        pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc);
 198        row_high_limit_last = 0;
 199        for (index = 0; index < mci->nr_csrows; index++) {
 200                csrow = &mci->csrows[index];
 201                pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar);
 202                debugf1("MC%d: " __FILE__ ": %s() Row=%d DRB = %#0x\n",
 203                        mci->mc_idx, __func__, index, drbar);
 204                row_high_limit = ((u32) drbar << 23);
 205                /* find the DRAM Chip Select Base address and mask */
 206                debugf1("MC%d: " __FILE__ ": %s() Row=%d, "
 207                        "Boundry Address=%#0x, Last = %#0x \n",
 208                        mci->mc_idx, __func__, index, row_high_limit,
 209                        row_high_limit_last);
 210
 211                /* 440GX goes to 2GB, represented with a DRB of 0. */
 212                if (row_high_limit_last && !row_high_limit)
 213                        row_high_limit = 1UL << 31;
 214
 215                /* This row is empty [p.49] */
 216                if (row_high_limit == row_high_limit_last)
 217                        continue;
 218                row_base = row_high_limit_last;
 219                csrow->first_page = row_base >> PAGE_SHIFT;
 220                csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
 221                csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
 222                /* EAP reports in 4kilobyte granularity [61] */
 223                csrow->grain = 1 << 12;
 224                csrow->mtype = mtype;
 225                /* I don't think 440BX can tell you device type? FIXME? */
 226                csrow->dtype = DEV_UNKNOWN;
 227                /* Mode is global to all rows on 440BX */
 228                csrow->edac_mode = edac_mode;
 229                row_high_limit_last = row_high_limit;
 230        }
 231}
 232
 233static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
 234{
 235        struct mem_ctl_info *mci;
 236        u8 dramc;
 237        u32 nbxcfg, ecc_mode;
 238        enum mem_type mtype;
 239        enum edac_type edac_mode;
 240
 241        debugf0("MC: " __FILE__ ": %s()\n", __func__);
 242
 243        /* Something is really hosed if PCI config space reads from
 244         * the MC aren't working.
 245         */
 246        if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg))
 247                return -EIO;
 248
 249        mci = edac_mc_alloc(0, I82443BXGX_NR_CSROWS, I82443BXGX_NR_CHANS, 0);
 250
 251        if (mci == NULL)
 252                return -ENOMEM;
 253
 254        debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
 255        mci->dev = &pdev->dev;
 256        mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR;
 257        mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
 258        pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc);
 259        switch ((dramc >> I82443BXGX_DRAMC_OFFSET_DT) & (BIT(0) | BIT(1))) {
 260        case I82443BXGX_DRAMC_DRAM_IS_EDO:
 261                mtype = MEM_EDO;
 262                break;
 263        case I82443BXGX_DRAMC_DRAM_IS_SDRAM:
 264                mtype = MEM_SDR;
 265                break;
 266        case I82443BXGX_DRAMC_DRAM_IS_RSDRAM:
 267                mtype = MEM_RDR;
 268                break;
 269        default:
 270                debugf0("Unknown/reserved DRAM type value "
 271                        "in DRAMC register!\n");
 272                mtype = -MEM_UNKNOWN;
 273        }
 274
 275        if ((mtype == MEM_SDR) || (mtype == MEM_RDR))
 276                mci->edac_cap = mci->edac_ctl_cap;
 277        else
 278                mci->edac_cap = EDAC_FLAG_NONE;
 279
 280        mci->scrub_cap = SCRUB_FLAG_HW_SRC;
 281        pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg);
 282        ecc_mode = ((nbxcfg >> I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY) &
 283                (BIT(0) | BIT(1)));
 284
 285        mci->scrub_mode = (ecc_mode == I82443BXGX_NBXCFG_INTEGRITY_SCRUB)
 286                ? SCRUB_HW_SRC : SCRUB_NONE;
 287
 288        switch (ecc_mode) {
 289        case I82443BXGX_NBXCFG_INTEGRITY_NONE:
 290                edac_mode = EDAC_NONE;
 291                break;
 292        case I82443BXGX_NBXCFG_INTEGRITY_EC:
 293                edac_mode = EDAC_EC;
 294                break;
 295        case I82443BXGX_NBXCFG_INTEGRITY_ECC:
 296        case I82443BXGX_NBXCFG_INTEGRITY_SCRUB:
 297                edac_mode = EDAC_SECDED;
 298                break;
 299        default:
 300                debugf0("%s(): Unknown/reserved ECC state "
 301                        "in NBXCFG register!\n", __func__);
 302                edac_mode = EDAC_UNKNOWN;
 303                break;
 304        }
 305
 306        i82443bxgx_init_csrows(mci, pdev, edac_mode, mtype);
 307
 308        /* Many BIOSes don't clear error flags on boot, so do this
 309         * here, or we get "phantom" errors occuring at module-load
 310         * time. */
 311        pci_write_bits32(pdev, I82443BXGX_EAP,
 312                        (I82443BXGX_EAP_OFFSET_SBE |
 313                                I82443BXGX_EAP_OFFSET_MBE),
 314                        (I82443BXGX_EAP_OFFSET_SBE |
 315                                I82443BXGX_EAP_OFFSET_MBE));
 316
 317        mci->mod_name = EDAC_MOD_STR;
 318        mci->mod_ver = I82443_REVISION;
 319        mci->ctl_name = "I82443BXGX";
 320        mci->dev_name = pci_name(pdev);
 321        mci->edac_check = i82443bxgx_edacmc_check;
 322        mci->ctl_page_to_phys = NULL;
 323
 324        if (edac_mc_add_mc(mci)) {
 325                debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
 326                goto fail;
 327        }
 328
 329        /* allocating generic PCI control info */
 330        i82443bxgx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
 331        if (!i82443bxgx_pci) {
 332                printk(KERN_WARNING
 333                        "%s(): Unable to create PCI control\n",
 334                        __func__);
 335                printk(KERN_WARNING
 336                        "%s(): PCI error report via EDAC not setup\n",
 337                        __func__);
 338        }
 339
 340        debugf3("MC: " __FILE__ ": %s(): success\n", __func__);
 341        return 0;
 342
 343fail:
 344        edac_mc_free(mci);
 345        return -ENODEV;
 346}
 347
 348EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1);
 349
 350/* returns count (>= 0), or negative on error */
 351static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
 352                                                const struct pci_device_id *ent)
 353{
 354        int rc;
 355
 356        debugf0("MC: " __FILE__ ": %s()\n", __func__);
 357
 358        /* don't need to call pci_device_enable() */
 359        rc = i82443bxgx_edacmc_probe1(pdev, ent->driver_data);
 360
 361        if (mci_pdev == NULL)
 362                mci_pdev = pci_dev_get(pdev);
 363
 364        return rc;
 365}
 366
 367static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
 368{
 369        struct mem_ctl_info *mci;
 370
 371        debugf0(__FILE__ ": %s()\n", __func__);
 372
 373        if (i82443bxgx_pci)
 374                edac_pci_release_generic_ctl(i82443bxgx_pci);
 375
 376        if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
 377                return;
 378
 379        edac_mc_free(mci);
 380}
 381
 382EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one);
 383
 384static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = {
 385        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)},
 386        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)},
 387        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)},
 388        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2)},
 389        {0,}                    /* 0 terminated list. */
 390};
 391
 392MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl);
 393
 394static struct pci_driver i82443bxgx_edacmc_driver = {
 395        .name = EDAC_MOD_STR,
 396        .probe = i82443bxgx_edacmc_init_one,
 397        .remove = __devexit_p(i82443bxgx_edacmc_remove_one),
 398        .id_table = i82443bxgx_pci_tbl,
 399};
 400
 401static int __init i82443bxgx_edacmc_init(void)
 402{
 403        int pci_rc;
 404       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
 405       opstate_init();
 406
 407        pci_rc = pci_register_driver(&i82443bxgx_edacmc_driver);
 408        if (pci_rc < 0)
 409                goto fail0;
 410
 411        if (mci_pdev == NULL) {
 412                const struct pci_device_id *id = &i82443bxgx_pci_tbl[0];
 413                int i = 0;
 414                i82443bxgx_registered = 0;
 415
 416                while (mci_pdev == NULL && id->vendor != 0) {
 417                        mci_pdev = pci_get_device(id->vendor,
 418                                        id->device, NULL);
 419                        i++;
 420                        id = &i82443bxgx_pci_tbl[i];
 421                }
 422                if (!mci_pdev) {
 423                        debugf0("i82443bxgx pci_get_device fail\n");
 424                        pci_rc = -ENODEV;
 425                        goto fail1;
 426                }
 427
 428                pci_rc = i82443bxgx_edacmc_init_one(mci_pdev, i82443bxgx_pci_tbl);
 429
 430                if (pci_rc < 0) {
 431                        debugf0("i82443bxgx init fail\n");
 432                        pci_rc = -ENODEV;
 433                        goto fail1;
 434                }
 435        }
 436
 437        return 0;
 438
 439fail1:
 440        pci_unregister_driver(&i82443bxgx_edacmc_driver);
 441
 442fail0:
 443        if (mci_pdev != NULL)
 444                pci_dev_put(mci_pdev);
 445
 446        return pci_rc;
 447}
 448
 449static void __exit i82443bxgx_edacmc_exit(void)
 450{
 451        pci_unregister_driver(&i82443bxgx_edacmc_driver);
 452
 453        if (!i82443bxgx_registered)
 454                i82443bxgx_edacmc_remove_one(mci_pdev);
 455
 456        if (mci_pdev)
 457                pci_dev_put(mci_pdev);
 458}
 459
 460module_init(i82443bxgx_edacmc_init);
 461module_exit(i82443bxgx_edacmc_exit);
 462
 463MODULE_LICENSE("GPL");
 464MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD");
 465MODULE_DESCRIPTION("EDAC MC support for Intel 82443BX/GX memory controllers");
 466
 467module_param(edac_op_state, int, 0444);
 468MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 469