linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/sysdev.h>
  29#include <linux/ctype.h>
  30#include <linux/edac.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37/* lock to memory controller's control array */
  38static DEFINE_MUTEX(mem_ctls_mutex);
  39static LIST_HEAD(mc_devices);
  40
  41#ifdef CONFIG_EDAC_DEBUG
  42
  43static void edac_mc_dump_channel(struct channel_info *chan)
  44{
  45        debugf4("\tchannel = %p\n", chan);
  46        debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  47        debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
  48        debugf4("\tchannel->label = '%s'\n", chan->label);
  49        debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  50}
  51
  52static void edac_mc_dump_csrow(struct csrow_info *csrow)
  53{
  54        debugf4("\tcsrow = %p\n", csrow);
  55        debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  56        debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  57        debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  58        debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  59        debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
  60        debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  61        debugf4("\tcsrow->channels = %p\n", csrow->channels);
  62        debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  63}
  64
  65static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  66{
  67        debugf3("\tmci = %p\n", mci);
  68        debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  69        debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  70        debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  71        debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  72        debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  73                mci->nr_csrows, mci->csrows);
  74        debugf3("\tdev = %p\n", mci->dev);
  75        debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  76        debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  77}
  78
  79#endif                          /* CONFIG_EDAC_DEBUG */
  80
  81/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
  82 * Adjust 'ptr' so that its alignment is at least as stringent as what the
  83 * compiler would provide for X and return the aligned result.
  84 *
  85 * If 'size' is a constant, the compiler will optimize this whole function
  86 * down to either a no-op or the addition of a constant to the value of 'ptr'.
  87 */
  88void *edac_align_ptr(void *ptr, unsigned size)
  89{
  90        unsigned align, r;
  91
  92        /* Here we assume that the alignment of a "long long" is the most
  93         * stringent alignment that the compiler will ever provide by default.
  94         * As far as I know, this is a reasonable assumption.
  95         */
  96        if (size > sizeof(long))
  97                align = sizeof(long long);
  98        else if (size > sizeof(int))
  99                align = sizeof(long);
 100        else if (size > sizeof(short))
 101                align = sizeof(int);
 102        else if (size > sizeof(char))
 103                align = sizeof(short);
 104        else
 105                return (char *)ptr;
 106
 107        r = size % align;
 108
 109        if (r == 0)
 110                return (char *)ptr;
 111
 112        return (void *)(((unsigned long)ptr) + align - r);
 113}
 114
 115/**
 116 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 117 * @size_pvt:   size of private storage needed
 118 * @nr_csrows:  Number of CWROWS needed for this MC
 119 * @nr_chans:   Number of channels for the MC
 120 *
 121 * Everything is kmalloc'ed as one big chunk - more efficient.
 122 * Only can be used if all structures have the same lifetime - otherwise
 123 * you have to allocate and initialize your own structures.
 124 *
 125 * Use edac_mc_free() to free mc structures allocated by this function.
 126 *
 127 * Returns:
 128 *      NULL allocation failed
 129 *      struct mem_ctl_info pointer
 130 */
 131struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 132                                unsigned nr_chans, int edac_index)
 133{
 134        struct mem_ctl_info *mci;
 135        struct csrow_info *csi, *csrow;
 136        struct channel_info *chi, *chp, *chan;
 137        void *pvt;
 138        unsigned size;
 139        int row, chn;
 140        int err;
 141
 142        /* Figure out the offsets of the various items from the start of an mc
 143         * structure.  We want the alignment of each item to be at least as
 144         * stringent as what the compiler would provide if we could simply
 145         * hardcode everything into a single struct.
 146         */
 147        mci = (struct mem_ctl_info *)0;
 148        csi = edac_align_ptr(&mci[1], sizeof(*csi));
 149        chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
 150        pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
 151        size = ((unsigned long)pvt) + sz_pvt;
 152
 153        mci = kzalloc(size, GFP_KERNEL);
 154        if (mci == NULL)
 155                return NULL;
 156
 157        /* Adjust pointers so they point within the memory we just allocated
 158         * rather than an imaginary chunk of memory located at address 0.
 159         */
 160        csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 161        chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
 162        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 163
 164        /* setup index and various internal pointers */
 165        mci->mc_idx = edac_index;
 166        mci->csrows = csi;
 167        mci->pvt_info = pvt;
 168        mci->nr_csrows = nr_csrows;
 169
 170        for (row = 0; row < nr_csrows; row++) {
 171                csrow = &csi[row];
 172                csrow->csrow_idx = row;
 173                csrow->mci = mci;
 174                csrow->nr_channels = nr_chans;
 175                chp = &chi[row * nr_chans];
 176                csrow->channels = chp;
 177
 178                for (chn = 0; chn < nr_chans; chn++) {
 179                        chan = &chp[chn];
 180                        chan->chan_idx = chn;
 181                        chan->csrow = csrow;
 182                }
 183        }
 184
 185        mci->op_state = OP_ALLOC;
 186
 187        /*
 188         * Initialize the 'root' kobj for the edac_mc controller
 189         */
 190        err = edac_mc_register_sysfs_main_kobj(mci);
 191        if (err) {
 192                kfree(mci);
 193                return NULL;
 194        }
 195
 196        /* at this point, the root kobj is valid, and in order to
 197         * 'free' the object, then the function:
 198         *      edac_mc_unregister_sysfs_main_kobj() must be called
 199         * which will perform kobj unregistration and the actual free
 200         * will occur during the kobject callback operation
 201         */
 202        return mci;
 203}
 204EXPORT_SYMBOL_GPL(edac_mc_alloc);
 205
 206/**
 207 * edac_mc_free
 208 *      'Free' a previously allocated 'mci' structure
 209 * @mci: pointer to a struct mem_ctl_info structure
 210 */
 211void edac_mc_free(struct mem_ctl_info *mci)
 212{
 213        edac_mc_unregister_sysfs_main_kobj(mci);
 214}
 215EXPORT_SYMBOL_GPL(edac_mc_free);
 216
 217
 218/*
 219 * find_mci_by_dev
 220 *
 221 *      scan list of controllers looking for the one that manages
 222 *      the 'dev' device
 223 */
 224static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 225{
 226        struct mem_ctl_info *mci;
 227        struct list_head *item;
 228
 229        debugf3("%s()\n", __func__);
 230
 231        list_for_each(item, &mc_devices) {
 232                mci = list_entry(item, struct mem_ctl_info, link);
 233
 234                if (mci->dev == dev)
 235                        return mci;
 236        }
 237
 238        return NULL;
 239}
 240
 241/*
 242 * handler for EDAC to check if NMI type handler has asserted interrupt
 243 */
 244static int edac_mc_assert_error_check_and_clear(void)
 245{
 246        int old_state;
 247
 248        if (edac_op_state == EDAC_OPSTATE_POLL)
 249                return 1;
 250
 251        old_state = edac_err_assert;
 252        edac_err_assert = 0;
 253
 254        return old_state;
 255}
 256
 257/*
 258 * edac_mc_workq_function
 259 *      performs the operation scheduled by a workq request
 260 */
 261static void edac_mc_workq_function(struct work_struct *work_req)
 262{
 263        struct delayed_work *d_work = to_delayed_work(work_req);
 264        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 265
 266        mutex_lock(&mem_ctls_mutex);
 267
 268        /* if this control struct has movd to offline state, we are done */
 269        if (mci->op_state == OP_OFFLINE) {
 270                mutex_unlock(&mem_ctls_mutex);
 271                return;
 272        }
 273
 274        /* Only poll controllers that are running polled and have a check */
 275        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 276                mci->edac_check(mci);
 277
 278        mutex_unlock(&mem_ctls_mutex);
 279
 280        /* Reschedule */
 281        queue_delayed_work(edac_workqueue, &mci->work,
 282                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 283}
 284
 285/*
 286 * edac_mc_workq_setup
 287 *      initialize a workq item for this mci
 288 *      passing in the new delay period in msec
 289 *
 290 *      locking model:
 291 *
 292 *              called with the mem_ctls_mutex held
 293 */
 294static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 295{
 296        debugf0("%s()\n", __func__);
 297
 298        /* if this instance is not in the POLL state, then simply return */
 299        if (mci->op_state != OP_RUNNING_POLL)
 300                return;
 301
 302        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 303        queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 304}
 305
 306/*
 307 * edac_mc_workq_teardown
 308 *      stop the workq processing on this mci
 309 *
 310 *      locking model:
 311 *
 312 *              called WITHOUT lock held
 313 */
 314static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 315{
 316        int status;
 317
 318        status = cancel_delayed_work(&mci->work);
 319        if (status == 0) {
 320                debugf0("%s() not canceled, flush the queue\n",
 321                        __func__);
 322
 323                /* workq instance might be running, wait for it */
 324                flush_workqueue(edac_workqueue);
 325        }
 326}
 327
 328/*
 329 * edac_mc_reset_delay_period(unsigned long value)
 330 *
 331 *      user space has updated our poll period value, need to
 332 *      reset our workq delays
 333 */
 334void edac_mc_reset_delay_period(int value)
 335{
 336        struct mem_ctl_info *mci;
 337        struct list_head *item;
 338
 339        mutex_lock(&mem_ctls_mutex);
 340
 341        /* scan the list and turn off all workq timers, doing so under lock
 342         */
 343        list_for_each(item, &mc_devices) {
 344                mci = list_entry(item, struct mem_ctl_info, link);
 345
 346                if (mci->op_state == OP_RUNNING_POLL)
 347                        cancel_delayed_work(&mci->work);
 348        }
 349
 350        mutex_unlock(&mem_ctls_mutex);
 351
 352
 353        /* re-walk the list, and reset the poll delay */
 354        mutex_lock(&mem_ctls_mutex);
 355
 356        list_for_each(item, &mc_devices) {
 357                mci = list_entry(item, struct mem_ctl_info, link);
 358
 359                edac_mc_workq_setup(mci, (unsigned long) value);
 360        }
 361
 362        mutex_unlock(&mem_ctls_mutex);
 363}
 364
 365
 366
 367/* Return 0 on success, 1 on failure.
 368 * Before calling this function, caller must
 369 * assign a unique value to mci->mc_idx.
 370 *
 371 *      locking model:
 372 *
 373 *              called with the mem_ctls_mutex lock held
 374 */
 375static int add_mc_to_global_list(struct mem_ctl_info *mci)
 376{
 377        struct list_head *item, *insert_before;
 378        struct mem_ctl_info *p;
 379
 380        insert_before = &mc_devices;
 381
 382        p = find_mci_by_dev(mci->dev);
 383        if (unlikely(p != NULL))
 384                goto fail0;
 385
 386        list_for_each(item, &mc_devices) {
 387                p = list_entry(item, struct mem_ctl_info, link);
 388
 389                if (p->mc_idx >= mci->mc_idx) {
 390                        if (unlikely(p->mc_idx == mci->mc_idx))
 391                                goto fail1;
 392
 393                        insert_before = item;
 394                        break;
 395                }
 396        }
 397
 398        list_add_tail_rcu(&mci->link, insert_before);
 399        atomic_inc(&edac_handlers);
 400        return 0;
 401
 402fail0:
 403        edac_printk(KERN_WARNING, EDAC_MC,
 404                "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 405                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 406        return 1;
 407
 408fail1:
 409        edac_printk(KERN_WARNING, EDAC_MC,
 410                "bug in low-level driver: attempt to assign\n"
 411                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 412        return 1;
 413}
 414
 415static void complete_mc_list_del(struct rcu_head *head)
 416{
 417        struct mem_ctl_info *mci;
 418
 419        mci = container_of(head, struct mem_ctl_info, rcu);
 420        INIT_LIST_HEAD(&mci->link);
 421        complete(&mci->complete);
 422}
 423
 424static void del_mc_from_global_list(struct mem_ctl_info *mci)
 425{
 426        atomic_dec(&edac_handlers);
 427        list_del_rcu(&mci->link);
 428        init_completion(&mci->complete);
 429        call_rcu(&mci->rcu, complete_mc_list_del);
 430        wait_for_completion(&mci->complete);
 431}
 432
 433/**
 434 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 435 *
 436 * If found, return a pointer to the structure.
 437 * Else return NULL.
 438 *
 439 * Caller must hold mem_ctls_mutex.
 440 */
 441struct mem_ctl_info *edac_mc_find(int idx)
 442{
 443        struct list_head *item;
 444        struct mem_ctl_info *mci;
 445
 446        list_for_each(item, &mc_devices) {
 447                mci = list_entry(item, struct mem_ctl_info, link);
 448
 449                if (mci->mc_idx >= idx) {
 450                        if (mci->mc_idx == idx)
 451                                return mci;
 452
 453                        break;
 454                }
 455        }
 456
 457        return NULL;
 458}
 459EXPORT_SYMBOL(edac_mc_find);
 460
 461/**
 462 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 463 *                 create sysfs entries associated with mci structure
 464 * @mci: pointer to the mci structure to be added to the list
 465 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
 466 *
 467 * Return:
 468 *      0       Success
 469 *      !0      Failure
 470 */
 471
 472/* FIXME - should a warning be printed if no error detection? correction? */
 473int edac_mc_add_mc(struct mem_ctl_info *mci)
 474{
 475        debugf0("%s()\n", __func__);
 476
 477#ifdef CONFIG_EDAC_DEBUG
 478        if (edac_debug_level >= 3)
 479                edac_mc_dump_mci(mci);
 480
 481        if (edac_debug_level >= 4) {
 482                int i;
 483
 484                for (i = 0; i < mci->nr_csrows; i++) {
 485                        int j;
 486
 487                        edac_mc_dump_csrow(&mci->csrows[i]);
 488                        for (j = 0; j < mci->csrows[i].nr_channels; j++)
 489                                edac_mc_dump_channel(&mci->csrows[i].
 490                                                channels[j]);
 491                }
 492        }
 493#endif
 494        mutex_lock(&mem_ctls_mutex);
 495
 496        if (add_mc_to_global_list(mci))
 497                goto fail0;
 498
 499        /* set load time so that error rate can be tracked */
 500        mci->start_time = jiffies;
 501
 502        if (edac_create_sysfs_mci_device(mci)) {
 503                edac_mc_printk(mci, KERN_WARNING,
 504                        "failed to create sysfs device\n");
 505                goto fail1;
 506        }
 507
 508        /* If there IS a check routine, then we are running POLLED */
 509        if (mci->edac_check != NULL) {
 510                /* This instance is NOW RUNNING */
 511                mci->op_state = OP_RUNNING_POLL;
 512
 513                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 514        } else {
 515                mci->op_state = OP_RUNNING_INTERRUPT;
 516        }
 517
 518        /* Report action taken */
 519        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 520                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 521
 522        mutex_unlock(&mem_ctls_mutex);
 523        return 0;
 524
 525fail1:
 526        del_mc_from_global_list(mci);
 527
 528fail0:
 529        mutex_unlock(&mem_ctls_mutex);
 530        return 1;
 531}
 532EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 533
 534/**
 535 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 536 *                 remove mci structure from global list
 537 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 538 *
 539 * Return pointer to removed mci structure, or NULL if device not found.
 540 */
 541struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 542{
 543        struct mem_ctl_info *mci;
 544
 545        debugf0("%s()\n", __func__);
 546
 547        mutex_lock(&mem_ctls_mutex);
 548
 549        /* find the requested mci struct in the global list */
 550        mci = find_mci_by_dev(dev);
 551        if (mci == NULL) {
 552                mutex_unlock(&mem_ctls_mutex);
 553                return NULL;
 554        }
 555
 556        /* marking MCI offline */
 557        mci->op_state = OP_OFFLINE;
 558
 559        del_mc_from_global_list(mci);
 560        mutex_unlock(&mem_ctls_mutex);
 561
 562        /* flush workq processes and remove sysfs */
 563        edac_mc_workq_teardown(mci);
 564        edac_remove_sysfs_mci_device(mci);
 565
 566        edac_printk(KERN_INFO, EDAC_MC,
 567                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 568                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 569
 570        return mci;
 571}
 572EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 573
 574static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 575                                u32 size)
 576{
 577        struct page *pg;
 578        void *virt_addr;
 579        unsigned long flags = 0;
 580
 581        debugf3("%s()\n", __func__);
 582
 583        /* ECC error page was not in our memory. Ignore it. */
 584        if (!pfn_valid(page))
 585                return;
 586
 587        /* Find the actual page structure then map it and fix */
 588        pg = pfn_to_page(page);
 589
 590        if (PageHighMem(pg))
 591                local_irq_save(flags);
 592
 593        virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
 594
 595        /* Perform architecture specific atomic scrub operation */
 596        atomic_scrub(virt_addr + offset, size);
 597
 598        /* Unmap and complete */
 599        kunmap_atomic(virt_addr, KM_BOUNCE_READ);
 600
 601        if (PageHighMem(pg))
 602                local_irq_restore(flags);
 603}
 604
 605/* FIXME - should return -1 */
 606int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 607{
 608        struct csrow_info *csrows = mci->csrows;
 609        int row, i;
 610
 611        debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 612        row = -1;
 613
 614        for (i = 0; i < mci->nr_csrows; i++) {
 615                struct csrow_info *csrow = &csrows[i];
 616
 617                if (csrow->nr_pages == 0)
 618                        continue;
 619
 620                debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 621                        "mask(0x%lx)\n", mci->mc_idx, __func__,
 622                        csrow->first_page, page, csrow->last_page,
 623                        csrow->page_mask);
 624
 625                if ((page >= csrow->first_page) &&
 626                    (page <= csrow->last_page) &&
 627                    ((page & csrow->page_mask) ==
 628                     (csrow->first_page & csrow->page_mask))) {
 629                        row = i;
 630                        break;
 631                }
 632        }
 633
 634        if (row == -1)
 635                edac_mc_printk(mci, KERN_ERR,
 636                        "could not look up page error address %lx\n",
 637                        (unsigned long)page);
 638
 639        return row;
 640}
 641EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 642
 643/* FIXME - setable log (warning/emerg) levels */
 644/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
 645void edac_mc_handle_ce(struct mem_ctl_info *mci,
 646                unsigned long page_frame_number,
 647                unsigned long offset_in_page, unsigned long syndrome,
 648                int row, int channel, const char *msg)
 649{
 650        unsigned long remapped_page;
 651
 652        debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 653
 654        /* FIXME - maybe make panic on INTERNAL ERROR an option */
 655        if (row >= mci->nr_csrows || row < 0) {
 656                /* something is wrong */
 657                edac_mc_printk(mci, KERN_ERR,
 658                        "INTERNAL ERROR: row out of range "
 659                        "(%d >= %d)\n", row, mci->nr_csrows);
 660                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 661                return;
 662        }
 663
 664        if (channel >= mci->csrows[row].nr_channels || channel < 0) {
 665                /* something is wrong */
 666                edac_mc_printk(mci, KERN_ERR,
 667                        "INTERNAL ERROR: channel out of range "
 668                        "(%d >= %d)\n", channel,
 669                        mci->csrows[row].nr_channels);
 670                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 671                return;
 672        }
 673
 674        if (edac_mc_get_log_ce())
 675                /* FIXME - put in DIMM location */
 676                edac_mc_printk(mci, KERN_WARNING,
 677                        "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
 678                        "0x%lx, row %d, channel %d, label \"%s\": %s\n",
 679                        page_frame_number, offset_in_page,
 680                        mci->csrows[row].grain, syndrome, row, channel,
 681                        mci->csrows[row].channels[channel].label, msg);
 682
 683        mci->ce_count++;
 684        mci->csrows[row].ce_count++;
 685        mci->csrows[row].channels[channel].ce_count++;
 686
 687        if (mci->scrub_mode & SCRUB_SW_SRC) {
 688                /*
 689                 * Some MC's can remap memory so that it is still available
 690                 * at a different address when PCI devices map into memory.
 691                 * MC's that can't do this lose the memory where PCI devices
 692                 * are mapped.  This mapping is MC dependant and so we call
 693                 * back into the MC driver for it to map the MC page to
 694                 * a physical (CPU) page which can then be mapped to a virtual
 695                 * page - which can then be scrubbed.
 696                 */
 697                remapped_page = mci->ctl_page_to_phys ?
 698                        mci->ctl_page_to_phys(mci, page_frame_number) :
 699                        page_frame_number;
 700
 701                edac_mc_scrub_block(remapped_page, offset_in_page,
 702                                mci->csrows[row].grain);
 703        }
 704}
 705EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
 706
 707void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
 708{
 709        if (edac_mc_get_log_ce())
 710                edac_mc_printk(mci, KERN_WARNING,
 711                        "CE - no information available: %s\n", msg);
 712
 713        mci->ce_noinfo_count++;
 714        mci->ce_count++;
 715}
 716EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
 717
 718void edac_mc_handle_ue(struct mem_ctl_info *mci,
 719                unsigned long page_frame_number,
 720                unsigned long offset_in_page, int row, const char *msg)
 721{
 722        int len = EDAC_MC_LABEL_LEN * 4;
 723        char labels[len + 1];
 724        char *pos = labels;
 725        int chan;
 726        int chars;
 727
 728        debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 729
 730        /* FIXME - maybe make panic on INTERNAL ERROR an option */
 731        if (row >= mci->nr_csrows || row < 0) {
 732                /* something is wrong */
 733                edac_mc_printk(mci, KERN_ERR,
 734                        "INTERNAL ERROR: row out of range "
 735                        "(%d >= %d)\n", row, mci->nr_csrows);
 736                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 737                return;
 738        }
 739
 740        chars = snprintf(pos, len + 1, "%s",
 741                         mci->csrows[row].channels[0].label);
 742        len -= chars;
 743        pos += chars;
 744
 745        for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
 746                chan++) {
 747                chars = snprintf(pos, len + 1, ":%s",
 748                                 mci->csrows[row].channels[chan].label);
 749                len -= chars;
 750                pos += chars;
 751        }
 752
 753        if (edac_mc_get_log_ue())
 754                edac_mc_printk(mci, KERN_EMERG,
 755                        "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
 756                        "labels \"%s\": %s\n", page_frame_number,
 757                        offset_in_page, mci->csrows[row].grain, row,
 758                        labels, msg);
 759
 760        if (edac_mc_get_panic_on_ue())
 761                panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
 762                        "row %d, labels \"%s\": %s\n", mci->mc_idx,
 763                        page_frame_number, offset_in_page,
 764                        mci->csrows[row].grain, row, labels, msg);
 765
 766        mci->ue_count++;
 767        mci->csrows[row].ue_count++;
 768}
 769EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
 770
 771void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
 772{
 773        if (edac_mc_get_panic_on_ue())
 774                panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
 775
 776        if (edac_mc_get_log_ue())
 777                edac_mc_printk(mci, KERN_WARNING,
 778                        "UE - no information available: %s\n", msg);
 779        mci->ue_noinfo_count++;
 780        mci->ue_count++;
 781}
 782EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
 783
 784/*************************************************************
 785 * On Fully Buffered DIMM modules, this help function is
 786 * called to process UE events
 787 */
 788void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 789                        unsigned int csrow,
 790                        unsigned int channela,
 791                        unsigned int channelb, char *msg)
 792{
 793        int len = EDAC_MC_LABEL_LEN * 4;
 794        char labels[len + 1];
 795        char *pos = labels;
 796        int chars;
 797
 798        if (csrow >= mci->nr_csrows) {
 799                /* something is wrong */
 800                edac_mc_printk(mci, KERN_ERR,
 801                        "INTERNAL ERROR: row out of range (%d >= %d)\n",
 802                        csrow, mci->nr_csrows);
 803                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 804                return;
 805        }
 806
 807        if (channela >= mci->csrows[csrow].nr_channels) {
 808                /* something is wrong */
 809                edac_mc_printk(mci, KERN_ERR,
 810                        "INTERNAL ERROR: channel-a out of range "
 811                        "(%d >= %d)\n",
 812                        channela, mci->csrows[csrow].nr_channels);
 813                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 814                return;
 815        }
 816
 817        if (channelb >= mci->csrows[csrow].nr_channels) {
 818                /* something is wrong */
 819                edac_mc_printk(mci, KERN_ERR,
 820                        "INTERNAL ERROR: channel-b out of range "
 821                        "(%d >= %d)\n",
 822                        channelb, mci->csrows[csrow].nr_channels);
 823                edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 824                return;
 825        }
 826
 827        mci->ue_count++;
 828        mci->csrows[csrow].ue_count++;
 829
 830        /* Generate the DIMM labels from the specified channels */
 831        chars = snprintf(pos, len + 1, "%s",
 832                         mci->csrows[csrow].channels[channela].label);
 833        len -= chars;
 834        pos += chars;
 835        chars = snprintf(pos, len + 1, "-%s",
 836                         mci->csrows[csrow].channels[channelb].label);
 837
 838        if (edac_mc_get_log_ue())
 839                edac_mc_printk(mci, KERN_EMERG,
 840                        "UE row %d, channel-a= %d channel-b= %d "
 841                        "labels \"%s\": %s\n", csrow, channela, channelb,
 842                        labels, msg);
 843
 844        if (edac_mc_get_panic_on_ue())
 845                panic("UE row %d, channel-a= %d channel-b= %d "
 846                        "labels \"%s\": %s\n", csrow, channela,
 847                        channelb, labels, msg);
 848}
 849EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
 850
 851/*************************************************************
 852 * On Fully Buffered DIMM modules, this help function is
 853 * called to process CE events
 854 */
 855void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 856                        unsigned int csrow, unsigned int channel, char *msg)
 857{
 858
 859        /* Ensure boundary values */
 860        if (csrow >= mci->nr_csrows) {
 861                /* something is wrong */
 862                edac_mc_printk(mci, KERN_ERR,
 863                        "INTERNAL ERROR: row out of range (%d >= %d)\n",
 864                        csrow, mci->nr_csrows);
 865                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 866                return;
 867        }
 868        if (channel >= mci->csrows[csrow].nr_channels) {
 869                /* something is wrong */
 870                edac_mc_printk(mci, KERN_ERR,
 871                        "INTERNAL ERROR: channel out of range (%d >= %d)\n",
 872                        channel, mci->csrows[csrow].nr_channels);
 873                edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 874                return;
 875        }
 876
 877        if (edac_mc_get_log_ce())
 878                /* FIXME - put in DIMM location */
 879                edac_mc_printk(mci, KERN_WARNING,
 880                        "CE row %d, channel %d, label \"%s\": %s\n",
 881                        csrow, channel,
 882                        mci->csrows[csrow].channels[channel].label, msg);
 883
 884        mci->ce_count++;
 885        mci->csrows[csrow].ce_count++;
 886        mci->csrows[csrow].channels[channel].ce_count++;
 887}
 888EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
 889