linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/sysdev.h>
  29#include <linux/ctype.h>
  30#include <linux/edac.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37/* lock to memory controller's control array */
  38static DEFINE_MUTEX(mem_ctls_mutex);
  39static LIST_HEAD(mc_devices);
  40
  41#ifdef CONFIG_EDAC_DEBUG
  42
  43static void edac_mc_dump_channel(struct channel_info *chan)
  44{
  45        debugf4("\tchannel = %p\n", chan);
  46        debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  47        debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
  48        debugf4("\tchannel->label = '%s'\n", chan->label);
  49        debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  50}
  51
  52static void edac_mc_dump_csrow(struct csrow_info *csrow)
  53{
  54        debugf4("\tcsrow = %p\n", csrow);
  55        debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  56        debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  57        debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  58        debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  59        debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
  60        debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  61        debugf4("\tcsrow->channels = %p\n", csrow->channels);
  62        debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  63}
  64
  65static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  66{
  67        debugf3("\tmci = %p\n", mci);
  68        debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  69        debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  70        debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  71        debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  72        debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  73                mci->nr_csrows, mci->csrows);
  74        debugf3("\tdev = %p\n", mci->dev);
  75        debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  76        debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  77}
  78
  79#endif                          /* CONFIG_EDAC_DEBUG */
  80
  81/*
  82 * keep those in sync with the enum mem_type
  83 */
  84const char *edac_mem_types[] = {
  85        "Empty csrow",
  86        "Reserved csrow type",
  87        "Unknown csrow type",
  88        "Fast page mode RAM",
  89        "Extended data out RAM",
  90        "Burst Extended data out RAM",
  91        "Single data rate SDRAM",
  92        "Registered single data rate SDRAM",
  93        "Double data rate SDRAM",
  94        "Registered Double data rate SDRAM",
  95        "Rambus DRAM",
  96        "Unbuffered DDR2 RAM",
  97        "Fully buffered DDR2",
  98        "Registered DDR2 RAM",
  99        "Rambus XDR",
 100        "Unbuffered DDR3 RAM",
 101        "Registered DDR3 RAM",
 102};
 103EXPORT_SYMBOL_GPL(edac_mem_types);
 104
 105/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
 106 * Adjust 'ptr' so that its alignment is at least as stringent as what the
 107 * compiler would provide for X and return the aligned result.
 108 *
 109 * If 'size' is a constant, the compiler will optimize this whole function
 110 * down to either a no-op or the addition of a constant to the value of 'ptr'.
 111 */
 112void *edac_align_ptr(void *ptr, unsigned size)
 113{
 114        unsigned align, r;
 115
 116        /* Here we assume that the alignment of a "long long" is the most
 117         * stringent alignment that the compiler will ever provide by default.
 118         * As far as I know, this is a reasonable assumption.
 119         */
 120        if (size > sizeof(long))
 121                align = sizeof(long long);
 122        else if (size > sizeof(int))
 123                align = sizeof(long);
 124        else if (size > sizeof(short))
 125                align = sizeof(int);
 126        else if (size > sizeof(char))
 127                align = sizeof(short);
 128        else
 129                return (char *)ptr;
 130
 131        r = size % align;
 132
 133        if (r == 0)
 134                return (char *)ptr;
 135
 136        return (void *)(((unsigned long)ptr) + align - r);
 137}
 138
 139/**
 140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 141 * @size_pvt:   size of private storage needed
 142 * @nr_csrows:  Number of CWROWS needed for this MC
 143 * @nr_chans:   Number of channels for the MC
 144 *
 145 * Everything is kmalloc'ed as one big chunk - more efficient.
 146 * Only can be used if all structures have the same lifetime - otherwise
 147 * you have to allocate and initialize your own structures.
 148 *
 149 * Use edac_mc_free() to free mc structures allocated by this function.
 150 *
 151 * Returns:
 152 *      NULL allocation failed
 153 *      struct mem_ctl_info pointer
 154 */
 155struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 156                                unsigned nr_chans, int edac_index)
 157{
 158        struct mem_ctl_info *mci;
 159        struct csrow_info *csi, *csrow;
 160        struct channel_info *chi, *chp, *chan;
 161        void *pvt;
 162        unsigned size;
 163        int row, chn;
 164        int err;
 165
 166        /* Figure out the offsets of the various items from the start of an mc
 167         * structure.  We want the alignment of each item to be at least as
 168         * stringent as what the compiler would provide if we could simply
 169         * hardcode everything into a single struct.
 170         */
 171        mci = (struct mem_ctl_info *)0;
 172        csi = edac_align_ptr(&mci[1], sizeof(*csi));
 173        chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
 174        pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
 175        size = ((unsigned long)pvt) + sz_pvt;
 176
 177        mci = kzalloc(size, GFP_KERNEL);
 178        if (mci == NULL)
 179                return NULL;
 180
 181        /* Adjust pointers so they point within the memory we just allocated
 182         * rather than an imaginary chunk of memory located at address 0.
 183         */
 184        csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 185        chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
 186        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 187
 188        /* setup index and various internal pointers */
 189        mci->mc_idx = edac_index;
 190        mci->csrows = csi;
 191        mci->pvt_info = pvt;
 192        mci->nr_csrows = nr_csrows;
 193
 194        for (row = 0; row < nr_csrows; row++) {
 195                csrow = &csi[row];
 196                csrow->csrow_idx = row;
 197                csrow->mci = mci;
 198                csrow->nr_channels = nr_chans;
 199                chp = &chi[row * nr_chans];
 200                csrow->channels = chp;
 201
 202                for (chn = 0; chn < nr_chans; chn++) {
 203                        chan = &chp[chn];
 204                        chan->chan_idx = chn;
 205                        chan->csrow = csrow;
 206                }
 207        }
 208
 209        mci->op_state = OP_ALLOC;
 210        INIT_LIST_HEAD(&mci->grp_kobj_list);
 211
 212        /*
 213         * Initialize the 'root' kobj for the edac_mc controller
 214         */
 215        err = edac_mc_register_sysfs_main_kobj(mci);
 216        if (err) {
 217                kfree(mci);
 218                return NULL;
 219        }
 220
 221        /* at this point, the root kobj is valid, and in order to
 222         * 'free' the object, then the function:
 223         *      edac_mc_unregister_sysfs_main_kobj() must be called
 224         * which will perform kobj unregistration and the actual free
 225         * will occur during the kobject callback operation
 226         */
 227        return mci;
 228}
 229EXPORT_SYMBOL_GPL(edac_mc_alloc);
 230
 231/**
 232 * edac_mc_free
 233 *      'Free' a previously allocated 'mci' structure
 234 * @mci: pointer to a struct mem_ctl_info structure
 235 */
 236void edac_mc_free(struct mem_ctl_info *mci)
 237{
 238        debugf1("%s()\n", __func__);
 239
 240        edac_mc_unregister_sysfs_main_kobj(mci);
 241
 242        /* free the mci instance memory here */
 243        kfree(mci);
 244}
 245EXPORT_SYMBOL_GPL(edac_mc_free);
 246
 247
 248/**
 249 * find_mci_by_dev
 250 *
 251 *      scan list of controllers looking for the one that manages
 252 *      the 'dev' device
 253 * @dev: pointer to a struct device related with the MCI
 254 */
 255struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 256{
 257        struct mem_ctl_info *mci;
 258        struct list_head *item;
 259
 260        debugf3("%s()\n", __func__);
 261
 262        list_for_each(item, &mc_devices) {
 263                mci = list_entry(item, struct mem_ctl_info, link);
 264
 265                if (mci->dev == dev)
 266                        return mci;
 267        }
 268
 269        return NULL;
 270}
 271EXPORT_SYMBOL_GPL(find_mci_by_dev);
 272
 273/*
 274 * handler for EDAC to check if NMI type handler has asserted interrupt
 275 */
 276static int edac_mc_assert_error_check_and_clear(void)
 277{
 278        int old_state;
 279
 280        if (edac_op_state == EDAC_OPSTATE_POLL)
 281                return 1;
 282
 283        old_state = edac_err_assert;
 284        edac_err_assert = 0;
 285
 286        return old_state;
 287}
 288
 289/*
 290 * edac_mc_workq_function
 291 *      performs the operation scheduled by a workq request
 292 */
 293static void edac_mc_workq_function(struct work_struct *work_req)
 294{
 295        struct delayed_work *d_work = to_delayed_work(work_req);
 296        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 297
 298        mutex_lock(&mem_ctls_mutex);
 299
 300        /* if this control struct has movd to offline state, we are done */
 301        if (mci->op_state == OP_OFFLINE) {
 302                mutex_unlock(&mem_ctls_mutex);
 303                return;
 304        }
 305
 306        /* Only poll controllers that are running polled and have a check */
 307        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 308                mci->edac_check(mci);
 309
 310        mutex_unlock(&mem_ctls_mutex);
 311
 312        /* Reschedule */
 313        queue_delayed_work(edac_workqueue, &mci->work,
 314                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 315}
 316
 317/*
 318 * edac_mc_workq_setup
 319 *      initialize a workq item for this mci
 320 *      passing in the new delay period in msec
 321 *
 322 *      locking model:
 323 *
 324 *              called with the mem_ctls_mutex held
 325 */
 326static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 327{
 328        debugf0("%s()\n", __func__);
 329
 330        /* if this instance is not in the POLL state, then simply return */
 331        if (mci->op_state != OP_RUNNING_POLL)
 332                return;
 333
 334        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 335        queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 336}
 337
 338/*
 339 * edac_mc_workq_teardown
 340 *      stop the workq processing on this mci
 341 *
 342 *      locking model:
 343 *
 344 *              called WITHOUT lock held
 345 */
 346static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 347{
 348        int status;
 349
 350        if (mci->op_state != OP_RUNNING_POLL)
 351                return;
 352
 353        status = cancel_delayed_work(&mci->work);
 354        if (status == 0) {
 355                debugf0("%s() not canceled, flush the queue\n",
 356                        __func__);
 357
 358                /* workq instance might be running, wait for it */
 359                flush_workqueue(edac_workqueue);
 360        }
 361}
 362
 363/*
 364 * edac_mc_reset_delay_period(unsigned long value)
 365 *
 366 *      user space has updated our poll period value, need to
 367 *      reset our workq delays
 368 */
 369void edac_mc_reset_delay_period(int value)
 370{
 371        struct mem_ctl_info *mci;
 372        struct list_head *item;
 373
 374        mutex_lock(&mem_ctls_mutex);
 375
 376        /* scan the list and turn off all workq timers, doing so under lock
 377         */
 378        list_for_each(item, &mc_devices) {
 379                mci = list_entry(item, struct mem_ctl_info, link);
 380
 381                if (mci->op_state == OP_RUNNING_POLL)
 382                        cancel_delayed_work(&mci->work);
 383        }
 384
 385        mutex_unlock(&mem_ctls_mutex);
 386
 387
 388        /* re-walk the list, and reset the poll delay */
 389        mutex_lock(&mem_ctls_mutex);
 390
 391        list_for_each(item, &mc_devices) {
 392                mci = list_entry(item, struct mem_ctl_info, link);
 393
 394                edac_mc_workq_setup(mci, (unsigned long) value);
 395        }
 396
 397        mutex_unlock(&mem_ctls_mutex);
 398}
 399
 400
 401
 402/* Return 0 on success, 1 on failure.
 403 * Before calling this function, caller must
 404 * assign a unique value to mci->mc_idx.
 405 *
 406 *      locking model:
 407 *
 408 *              called with the mem_ctls_mutex lock held
 409 */
 410static int add_mc_to_global_list(struct mem_ctl_info *mci)
 411{
 412        struct list_head *item, *insert_before;
 413        struct mem_ctl_info *p;
 414
 415        insert_before = &mc_devices;
 416
 417        p = find_mci_by_dev(mci->dev);
 418        if (unlikely(p != NULL))
 419                goto fail0;
 420
 421        list_for_each(item, &mc_devices) {
 422                p = list_entry(item, struct mem_ctl_info, link);
 423
 424                if (p->mc_idx >= mci->mc_idx) {
 425                        if (unlikely(p->mc_idx == mci->mc_idx))
 426                                goto fail1;
 427
 428                        insert_before = item;
 429                        break;
 430                }
 431        }
 432
 433        list_add_tail_rcu(&mci->link, insert_before);
 434        atomic_inc(&edac_handlers);
 435        return 0;
 436
 437fail0:
 438        edac_printk(KERN_WARNING, EDAC_MC,
 439                "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 440                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 441        return 1;
 442
 443fail1:
 444        edac_printk(KERN_WARNING, EDAC_MC,
 445                "bug in low-level driver: attempt to assign\n"
 446                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 447        return 1;
 448}
 449
 450static void complete_mc_list_del(struct rcu_head *head)
 451{
 452        struct mem_ctl_info *mci;
 453
 454        mci = container_of(head, struct mem_ctl_info, rcu);
 455        INIT_LIST_HEAD(&mci->link);
 456}
 457
 458static void del_mc_from_global_list(struct mem_ctl_info *mci)
 459{
 460        atomic_dec(&edac_handlers);
 461        list_del_rcu(&mci->link);
 462        call_rcu(&mci->rcu, complete_mc_list_del);
 463        rcu_barrier();
 464}
 465
 466/**
 467 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 468 *
 469 * If found, return a pointer to the structure.
 470 * Else return NULL.
 471 *
 472 * Caller must hold mem_ctls_mutex.
 473 */
 474struct mem_ctl_info *edac_mc_find(int idx)
 475{
 476        struct list_head *item;
 477        struct mem_ctl_info *mci;
 478
 479        list_for_each(item, &mc_devices) {
 480                mci = list_entry(item, struct mem_ctl_info, link);
 481
 482                if (mci->mc_idx >= idx) {
 483                        if (mci->mc_idx == idx)
 484                                return mci;
 485
 486                        break;
 487                }
 488        }
 489
 490        return NULL;
 491}
 492EXPORT_SYMBOL(edac_mc_find);
 493
 494/**
 495 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 496 *                 create sysfs entries associated with mci structure
 497 * @mci: pointer to the mci structure to be added to the list
 498 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
 499 *
 500 * Return:
 501 *      0       Success
 502 *      !0      Failure
 503 */
 504
 505/* FIXME - should a warning be printed if no error detection? correction? */
 506int edac_mc_add_mc(struct mem_ctl_info *mci)
 507{
 508        debugf0("%s()\n", __func__);
 509
 510#ifdef CONFIG_EDAC_DEBUG
 511        if (edac_debug_level >= 3)
 512                edac_mc_dump_mci(mci);
 513
 514        if (edac_debug_level >= 4) {
 515                int i;
 516
 517                for (i = 0; i < mci->nr_csrows; i++) {
 518                        int j;
 519
 520                        edac_mc_dump_csrow(&mci->csrows[i]);
 521                        for (j = 0; j < mci->csrows[i].nr_channels; j++)
 522                                edac_mc_dump_channel(&mci->csrows[i].
 523                                                channels[j]);
 524                }
 525        }
 526#endif
 527        mutex_lock(&mem_ctls_mutex);
 528
 529        if (add_mc_to_global_list(mci))
 530                goto fail0;
 531
 532        /* set load time so that error rate can be tracked */
 533        mci->start_time = jiffies;
 534
 535        if (edac_create_sysfs_mci_device(mci)) {
 536                edac_mc_printk(mci, KERN_WARNING,
 537                        "failed to create sysfs device\n");
 538                goto fail1;
 539        }
 540
 541        /* If there IS a check routine, then we are running POLLED */
 542        if (mci->edac_check != NULL) {
 543                /* This instance is NOW RUNNING */
 544                mci->op_state = OP_RUNNING_POLL;
 545
 546                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 547        } else {
 548                mci->op_state = OP_RUNNING_INTERRUPT;
 549        }
 550
 551        /* Report action taken */
 552        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 553                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 554
 555        mutex_unlock(&mem_ctls_mutex);
 556        return 0;
 557
 558fail1:
 559        del_mc_from_global_list(mci);
 560
 561fail0:
 562        mutex_unlock(&mem_ctls_mutex);
 563        return 1;
 564}
 565EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 566
 567/**
 568 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 569 *                 remove mci structure from global list
 570 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 571 *
 572 * Return pointer to removed mci structure, or NULL if device not found.
 573 */
 574struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 575{
 576        struct mem_ctl_info *mci;
 577
 578        debugf0("%s()\n", __func__);
 579
 580        mutex_lock(&mem_ctls_mutex);
 581
 582        /* find the requested mci struct in the global list */
 583        mci = find_mci_by_dev(dev);
 584        if (mci == NULL) {
 585                mutex_unlock(&mem_ctls_mutex);
 586                return NULL;
 587        }
 588
 589        del_mc_from_global_list(mci);
 590        mutex_unlock(&mem_ctls_mutex);
 591
 592        /* flush workq processes */
 593        edac_mc_workq_teardown(mci);
 594
 595        /* marking MCI offline */
 596        mci->op_state = OP_OFFLINE;
 597
 598        /* remove from sysfs */
 599        edac_remove_sysfs_mci_device(mci);
 600
 601        edac_printk(KERN_INFO, EDAC_MC,
 602                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 603                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 604
 605        return mci;
 606}
 607EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 608
 609static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 610                                u32 size)
 611{
 612        struct page *pg;
 613        void *virt_addr;
 614        unsigned long flags = 0;
 615
 616        debugf3("%s()\n", __func__);
 617
 618        /* ECC error page was not in our memory. Ignore it. */
 619        if (!pfn_valid(page))
 620                return;
 621
 622        /* Find the actual page structure then map it and fix */
 623        pg = pfn_to_page(page);
 624
 625        if (PageHighMem(pg))
 626                local_irq_save(flags);
 627
 628        virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
 629
 630        /* Perform architecture specific atomic scrub operation */
 631        atomic_scrub(virt_addr + offset, size);
 632
 633        /* Unmap and complete */
 634        kunmap_atomic(virt_addr, KM_BOUNCE_READ);
 635
 636        if (PageHighMem(pg))
 637                local_irq_restore(flags);
 638}
 639
 640/* FIXME - should return -1 */
 641int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 642{
 643        struct csrow_info *csrows = mci->csrows;
 644        int row, i;
 645
 646        debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 647        row = -1;
 648
 649        for (i = 0; i < mci->nr_csrows; i++) {
 650                struct csrow_info *csrow = &csrows[i];
 651
 652                if (csrow->nr_pages == 0)
 653                        continue;
 654
 655                debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 656                        "mask(0x%lx)\n", mci->mc_idx, __func__,
 657                        csrow->first_page, page, csrow->last_page,
 658                        csrow->page_mask);
 659
 660                if ((page >= csrow->first_page) &&
 661                    (page <= csrow->last_page) &&
 662                    ((page & csrow->page_mask) ==
 663                     (csrow->first_page & csrow->page_mask))) {
 664                        row = i;
 665                        break;
 666                }
 667        }
 668
 669        if (row == -1)
 670                edac_mc_printk(mci, KERN_ERR,
 671                        "could not look up page error address %lx\n",
 672                        (unsigned long)page);
 673
 674        return row;
 675}
 676EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 677
 678/* FIXME - setable log (warning/emerg) levels */
 679/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
 680void edac_mc_handle_ce(struct mem_ctl_info *mci,
 681                unsigned long page_frame_number,
 682                unsigned long offset_in_page, unsigned long syndrome,
 683                int row, int channel, const char *msg)
 684{
 685        unsigned long remapped_page;
 686
 687        debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 688
 689        /* FIXME - maybe make panic on INTERNAL ERROR an option */
 690        if (row >= mci->nr_csrows || row < 0) {
 691                /* something is wrong */
 692                edac_mc_printk(mci, KERN_ERR,
 693                        "INTERNAL ERROR: row out of range "
 694                        "(%d >= %d)\n", row, mci->nr_csrows);
 695                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 696                return;
 697        }
 698
 699        if (channel >= mci->csrows[row].nr_channels || channel < 0) {
 700                /* something is wrong */
 701                edac_mc_printk(mci, KERN_ERR,
 702                        "INTERNAL ERROR: channel out of range "
 703                        "(%d >= %d)\n", channel,
 704                        mci->csrows[row].nr_channels);
 705                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 706                return;
 707        }
 708
 709        if (edac_mc_get_log_ce())
 710                /* FIXME - put in DIMM location */
 711                edac_mc_printk(mci, KERN_WARNING,
 712                        "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
 713                        "0x%lx, row %d, channel %d, label \"%s\": %s\n",
 714                        page_frame_number, offset_in_page,
 715                        mci->csrows[row].grain, syndrome, row, channel,
 716                        mci->csrows[row].channels[channel].label, msg);
 717
 718        mci->ce_count++;
 719        mci->csrows[row].ce_count++;
 720        mci->csrows[row].channels[channel].ce_count++;
 721
 722        if (mci->scrub_mode & SCRUB_SW_SRC) {
 723                /*
 724                 * Some MC's can remap memory so that it is still available
 725                 * at a different address when PCI devices map into memory.
 726                 * MC's that can't do this lose the memory where PCI devices
 727                 * are mapped.  This mapping is MC dependant and so we call
 728                 * back into the MC driver for it to map the MC page to
 729                 * a physical (CPU) page which can then be mapped to a virtual
 730                 * page - which can then be scrubbed.
 731                 */
 732                remapped_page = mci->ctl_page_to_phys ?
 733                        mci->ctl_page_to_phys(mci, page_frame_number) :
 734                        page_frame_number;
 735
 736                edac_mc_scrub_block(remapped_page, offset_in_page,
 737                                mci->csrows[row].grain);
 738        }
 739}
 740EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
 741
 742void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
 743{
 744        if (edac_mc_get_log_ce())
 745                edac_mc_printk(mci, KERN_WARNING,
 746                        "CE - no information available: %s\n", msg);
 747
 748        mci->ce_noinfo_count++;
 749        mci->ce_count++;
 750}
 751EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
 752
 753void edac_mc_handle_ue(struct mem_ctl_info *mci,
 754                unsigned long page_frame_number,
 755                unsigned long offset_in_page, int row, const char *msg)
 756{
 757        int len = EDAC_MC_LABEL_LEN * 4;
 758        char labels[len + 1];
 759        char *pos = labels;
 760        int chan;
 761        int chars;
 762
 763        debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 764
 765        /* FIXME - maybe make panic on INTERNAL ERROR an option */
 766        if (row >= mci->nr_csrows || row < 0) {
 767                /* something is wrong */
 768                edac_mc_printk(mci, KERN_ERR,
 769                        "INTERNAL ERROR: row out of range "
 770                        "(%d >= %d)\n", row, mci->nr_csrows);
 771                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 772                return;
 773        }
 774
 775        chars = snprintf(pos, len + 1, "%s",
 776                         mci->csrows[row].channels[0].label);
 777        len -= chars;
 778        pos += chars;
 779
 780        for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
 781                chan++) {
 782                chars = snprintf(pos, len + 1, ":%s",
 783                                 mci->csrows[row].channels[chan].label);
 784                len -= chars;
 785                pos += chars;
 786        }
 787
 788        if (edac_mc_get_log_ue())
 789                edac_mc_printk(mci, KERN_EMERG,
 790                        "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
 791                        "labels \"%s\": %s\n", page_frame_number,
 792                        offset_in_page, mci->csrows[row].grain, row,
 793                        labels, msg);
 794
 795        if (edac_mc_get_panic_on_ue())
 796                panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
 797                        "row %d, labels \"%s\": %s\n", mci->mc_idx,
 798                        page_frame_number, offset_in_page,
 799                        mci->csrows[row].grain, row, labels, msg);
 800
 801        mci->ue_count++;
 802        mci->csrows[row].ue_count++;
 803}
 804EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
 805
 806void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
 807{
 808        if (edac_mc_get_panic_on_ue())
 809                panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
 810
 811        if (edac_mc_get_log_ue())
 812                edac_mc_printk(mci, KERN_WARNING,
 813                        "UE - no information available: %s\n", msg);
 814        mci->ue_noinfo_count++;
 815        mci->ue_count++;
 816}
 817EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
 818
 819/*************************************************************
 820 * On Fully Buffered DIMM modules, this help function is
 821 * called to process UE events
 822 */
 823void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 824                        unsigned int csrow,
 825                        unsigned int channela,
 826                        unsigned int channelb, char *msg)
 827{
 828        int len = EDAC_MC_LABEL_LEN * 4;
 829        char labels[len + 1];
 830        char *pos = labels;
 831        int chars;
 832
 833        if (csrow >= mci->nr_csrows) {
 834                /* something is wrong */
 835                edac_mc_printk(mci, KERN_ERR,
 836                        "INTERNAL ERROR: row out of range (%d >= %d)\n",
 837                        csrow, mci->nr_csrows);
 838                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 839                return;
 840        }
 841
 842        if (channela >= mci->csrows[csrow].nr_channels) {
 843                /* something is wrong */
 844                edac_mc_printk(mci, KERN_ERR,
 845                        "INTERNAL ERROR: channel-a out of range "
 846                        "(%d >= %d)\n",
 847                        channela, mci->csrows[csrow].nr_channels);
 848                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 849                return;
 850        }
 851
 852        if (channelb >= mci->csrows[csrow].nr_channels) {
 853                /* something is wrong */
 854                edac_mc_printk(mci, KERN_ERR,
 855                        "INTERNAL ERROR: channel-b out of range "
 856                        "(%d >= %d)\n",
 857                        channelb, mci->csrows[csrow].nr_channels);
 858                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 859                return;
 860        }
 861
 862        mci->ue_count++;
 863        mci->csrows[csrow].ue_count++;
 864
 865        /* Generate the DIMM labels from the specified channels */
 866        chars = snprintf(pos, len + 1, "%s",
 867                         mci->csrows[csrow].channels[channela].label);
 868        len -= chars;
 869        pos += chars;
 870        chars = snprintf(pos, len + 1, "-%s",
 871                         mci->csrows[csrow].channels[channelb].label);
 872
 873        if (edac_mc_get_log_ue())
 874                edac_mc_printk(mci, KERN_EMERG,
 875                        "UE row %d, channel-a= %d channel-b= %d "
 876                        "labels \"%s\": %s\n", csrow, channela, channelb,
 877                        labels, msg);
 878
 879        if (edac_mc_get_panic_on_ue())
 880                panic("UE row %d, channel-a= %d channel-b= %d "
 881                        "labels \"%s\": %s\n", csrow, channela,
 882                        channelb, labels, msg);
 883}
 884EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
 885
 886/*************************************************************
 887 * On Fully Buffered DIMM modules, this help function is
 888 * called to process CE events
 889 */
 890void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 891                        unsigned int csrow, unsigned int channel, char *msg)
 892{
 893
 894        /* Ensure boundary values */
 895        if (csrow >= mci->nr_csrows) {
 896                /* something is wrong */
 897                edac_mc_printk(mci, KERN_ERR,
 898                        "INTERNAL ERROR: row out of range (%d >= %d)\n",
 899                        csrow, mci->nr_csrows);
 900                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 901                return;
 902        }
 903        if (channel >= mci->csrows[csrow].nr_channels) {
 904                /* something is wrong */
 905                edac_mc_printk(mci, KERN_ERR,
 906                        "INTERNAL ERROR: channel out of range (%d >= %d)\n",
 907                        channel, mci->csrows[csrow].nr_channels);
 908                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 909                return;
 910        }
 911
 912        if (edac_mc_get_log_ce())
 913                /* FIXME - put in DIMM location */
 914                edac_mc_printk(mci, KERN_WARNING,
 915                        "CE row %d, channel %d, label \"%s\": %s\n",
 916                        csrow, channel,
 917                        mci->csrows[csrow].channels[channel].label, msg);
 918
 919        mci->ce_count++;
 920        mci->csrows[csrow].ce_count++;
 921        mci->csrows[csrow].channels[channel].ce_count++;
 922}
 923EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
 924