linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <linux/bitops.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37#define CREATE_TRACE_POINTS
  38#define TRACE_INCLUDE_PATH ../../include/ras
  39#include <ras/ras_event.h>
  40
  41/* lock to memory controller's control array */
  42static DEFINE_MUTEX(mem_ctls_mutex);
  43static LIST_HEAD(mc_devices);
  44
  45unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
  46                                 unsigned len)
  47{
  48        struct mem_ctl_info *mci = dimm->mci;
  49        int i, n, count = 0;
  50        char *p = buf;
  51
  52        for (i = 0; i < mci->n_layers; i++) {
  53                n = snprintf(p, len, "%s %d ",
  54                              edac_layer_name[mci->layers[i].type],
  55                              dimm->location[i]);
  56                p += n;
  57                len -= n;
  58                count += n;
  59                if (!len)
  60                        break;
  61        }
  62
  63        return count;
  64}
  65
  66#ifdef CONFIG_EDAC_DEBUG
  67
  68static void edac_mc_dump_channel(struct rank_info *chan)
  69{
  70        edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
  71        edac_dbg(4, "    channel = %p\n", chan);
  72        edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
  73        edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
  74}
  75
  76static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
  77{
  78        char location[80];
  79
  80        edac_dimm_info_location(dimm, location, sizeof(location));
  81
  82        edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
  83                 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
  84                 number, location, dimm->csrow, dimm->cschannel);
  85        edac_dbg(4, "  dimm = %p\n", dimm);
  86        edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
  87        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  88        edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
  89        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  90}
  91
  92static void edac_mc_dump_csrow(struct csrow_info *csrow)
  93{
  94        edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
  95        edac_dbg(4, "  csrow = %p\n", csrow);
  96        edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
  97        edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
  98        edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
  99        edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
 100        edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
 101        edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 102}
 103
 104static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 105{
 106        edac_dbg(3, "\tmci = %p\n", mci);
 107        edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
 108        edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
 109        edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
 110        edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
 111        edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
 112                 mci->nr_csrows, mci->csrows);
 113        edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
 114                 mci->tot_dimms, mci->dimms);
 115        edac_dbg(3, "\tdev = %p\n", mci->pdev);
 116        edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
 117                 mci->mod_name, mci->ctl_name);
 118        edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 119}
 120
 121#endif                          /* CONFIG_EDAC_DEBUG */
 122
 123/*
 124 * keep those in sync with the enum mem_type
 125 */
 126const char *edac_mem_types[] = {
 127        "Empty csrow",
 128        "Reserved csrow type",
 129        "Unknown csrow type",
 130        "Fast page mode RAM",
 131        "Extended data out RAM",
 132        "Burst Extended data out RAM",
 133        "Single data rate SDRAM",
 134        "Registered single data rate SDRAM",
 135        "Double data rate SDRAM",
 136        "Registered Double data rate SDRAM",
 137        "Rambus DRAM",
 138        "Unbuffered DDR2 RAM",
 139        "Fully buffered DDR2",
 140        "Registered DDR2 RAM",
 141        "Rambus XDR",
 142        "Unbuffered DDR3 RAM",
 143        "Registered DDR3 RAM",
 144};
 145EXPORT_SYMBOL_GPL(edac_mem_types);
 146
 147/**
 148 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 149 * @p:          pointer to a pointer with the memory offset to be used. At
 150 *              return, this will be incremented to point to the next offset
 151 * @size:       Size of the data structure to be reserved
 152 * @n_elems:    Number of elements that should be reserved
 153 *
 154 * If 'size' is a constant, the compiler will optimize this whole function
 155 * down to either a no-op or the addition of a constant to the value of '*p'.
 156 *
 157 * The 'p' pointer is absolutely needed to keep the proper advancing
 158 * further in memory to the proper offsets when allocating the struct along
 159 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 160 * above, for example.
 161 *
 162 * At return, the pointer 'p' will be incremented to be used on a next call
 163 * to this function.
 164 */
 165void *edac_align_ptr(void **p, unsigned size, int n_elems)
 166{
 167        unsigned align, r;
 168        void *ptr = *p;
 169
 170        *p += size * n_elems;
 171
 172        /*
 173         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 174         * 'size'.  Adjust 'p' so that its alignment is at least as
 175         * stringent as what the compiler would provide for X and return
 176         * the aligned result.
 177         * Here we assume that the alignment of a "long long" is the most
 178         * stringent alignment that the compiler will ever provide by default.
 179         * As far as I know, this is a reasonable assumption.
 180         */
 181        if (size > sizeof(long))
 182                align = sizeof(long long);
 183        else if (size > sizeof(int))
 184                align = sizeof(long);
 185        else if (size > sizeof(short))
 186                align = sizeof(int);
 187        else if (size > sizeof(char))
 188                align = sizeof(short);
 189        else
 190                return (char *)ptr;
 191
 192        r = (unsigned long)p % align;
 193
 194        if (r == 0)
 195                return (char *)ptr;
 196
 197        *p += align - r;
 198
 199        return (void *)(((unsigned long)ptr) + align - r);
 200}
 201
 202static void _edac_mc_free(struct mem_ctl_info *mci)
 203{
 204        int i, chn, row;
 205        struct csrow_info *csr;
 206        const unsigned int tot_dimms = mci->tot_dimms;
 207        const unsigned int tot_channels = mci->num_cschannel;
 208        const unsigned int tot_csrows = mci->nr_csrows;
 209
 210        if (mci->dimms) {
 211                for (i = 0; i < tot_dimms; i++)
 212                        kfree(mci->dimms[i]);
 213                kfree(mci->dimms);
 214        }
 215        if (mci->csrows) {
 216                for (row = 0; row < tot_csrows; row++) {
 217                        csr = mci->csrows[row];
 218                        if (csr) {
 219                                if (csr->channels) {
 220                                        for (chn = 0; chn < tot_channels; chn++)
 221                                                kfree(csr->channels[chn]);
 222                                        kfree(csr->channels);
 223                                }
 224                                kfree(csr);
 225                        }
 226                }
 227                kfree(mci->csrows);
 228        }
 229        kfree(mci);
 230}
 231
 232/**
 233 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 234 * @mc_num:             Memory controller number
 235 * @n_layers:           Number of MC hierarchy layers
 236 * layers:              Describes each layer as seen by the Memory Controller
 237 * @size_pvt:           size of private storage needed
 238 *
 239 *
 240 * Everything is kmalloc'ed as one big chunk - more efficient.
 241 * Only can be used if all structures have the same lifetime - otherwise
 242 * you have to allocate and initialize your own structures.
 243 *
 244 * Use edac_mc_free() to free mc structures allocated by this function.
 245 *
 246 * NOTE: drivers handle multi-rank memories in different ways: in some
 247 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 248 * others, a single multi-rank memory stick would be mapped into several
 249 * entries. Currently, this function will allocate multiple struct dimm_info
 250 * on such scenarios, as grouping the multiple ranks require drivers change.
 251 *
 252 * Returns:
 253 *      On failure: NULL
 254 *      On success: struct mem_ctl_info pointer
 255 */
 256struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 257                                   unsigned n_layers,
 258                                   struct edac_mc_layer *layers,
 259                                   unsigned sz_pvt)
 260{
 261        struct mem_ctl_info *mci;
 262        struct edac_mc_layer *layer;
 263        struct csrow_info *csr;
 264        struct rank_info *chan;
 265        struct dimm_info *dimm;
 266        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 267        unsigned pos[EDAC_MAX_LAYERS];
 268        unsigned size, tot_dimms = 1, count = 1;
 269        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 270        void *pvt, *p, *ptr = NULL;
 271        int i, j, row, chn, n, len, off;
 272        bool per_rank = false;
 273
 274        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 275        /*
 276         * Calculate the total amount of dimms and csrows/cschannels while
 277         * in the old API emulation mode
 278         */
 279        for (i = 0; i < n_layers; i++) {
 280                tot_dimms *= layers[i].size;
 281                if (layers[i].is_virt_csrow)
 282                        tot_csrows *= layers[i].size;
 283                else
 284                        tot_channels *= layers[i].size;
 285
 286                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 287                        per_rank = true;
 288        }
 289
 290        /* Figure out the offsets of the various items from the start of an mc
 291         * structure.  We want the alignment of each item to be at least as
 292         * stringent as what the compiler would provide if we could simply
 293         * hardcode everything into a single struct.
 294         */
 295        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 296        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 297        for (i = 0; i < n_layers; i++) {
 298                count *= layers[i].size;
 299                edac_dbg(4, "errcount layer %d size %d\n", i, count);
 300                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 301                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 302                tot_errcount += 2 * count;
 303        }
 304
 305        edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 306        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 307        size = ((unsigned long)pvt) + sz_pvt;
 308
 309        edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 310                 size,
 311                 tot_dimms,
 312                 per_rank ? "ranks" : "dimms",
 313                 tot_csrows * tot_channels);
 314
 315        mci = kzalloc(size, GFP_KERNEL);
 316        if (mci == NULL)
 317                return NULL;
 318
 319        /* Adjust pointers so they point within the memory we just allocated
 320         * rather than an imaginary chunk of memory located at address 0.
 321         */
 322        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 323        for (i = 0; i < n_layers; i++) {
 324                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 325                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 326        }
 327        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 328
 329        /* setup index and various internal pointers */
 330        mci->mc_idx = mc_num;
 331        mci->tot_dimms = tot_dimms;
 332        mci->pvt_info = pvt;
 333        mci->n_layers = n_layers;
 334        mci->layers = layer;
 335        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 336        mci->nr_csrows = tot_csrows;
 337        mci->num_cschannel = tot_channels;
 338        mci->mem_is_per_rank = per_rank;
 339
 340        /*
 341         * Alocate and fill the csrow/channels structs
 342         */
 343        mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
 344        if (!mci->csrows)
 345                goto error;
 346        for (row = 0; row < tot_csrows; row++) {
 347                csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 348                if (!csr)
 349                        goto error;
 350                mci->csrows[row] = csr;
 351                csr->csrow_idx = row;
 352                csr->mci = mci;
 353                csr->nr_channels = tot_channels;
 354                csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
 355                                        GFP_KERNEL);
 356                if (!csr->channels)
 357                        goto error;
 358
 359                for (chn = 0; chn < tot_channels; chn++) {
 360                        chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 361                        if (!chan)
 362                                goto error;
 363                        csr->channels[chn] = chan;
 364                        chan->chan_idx = chn;
 365                        chan->csrow = csr;
 366                }
 367        }
 368
 369        /*
 370         * Allocate and fill the dimm structs
 371         */
 372        mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
 373        if (!mci->dimms)
 374                goto error;
 375
 376        memset(&pos, 0, sizeof(pos));
 377        row = 0;
 378        chn = 0;
 379        for (i = 0; i < tot_dimms; i++) {
 380                chan = mci->csrows[row]->channels[chn];
 381                off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 382                if (off < 0 || off >= tot_dimms) {
 383                        edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 384                        goto error;
 385                }
 386
 387                dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 388                if (!dimm)
 389                        goto error;
 390                mci->dimms[off] = dimm;
 391                dimm->mci = mci;
 392
 393                /*
 394                 * Copy DIMM location and initialize it.
 395                 */
 396                len = sizeof(dimm->label);
 397                p = dimm->label;
 398                n = snprintf(p, len, "mc#%u", mc_num);
 399                p += n;
 400                len -= n;
 401                for (j = 0; j < n_layers; j++) {
 402                        n = snprintf(p, len, "%s#%u",
 403                                     edac_layer_name[layers[j].type],
 404                                     pos[j]);
 405                        p += n;
 406                        len -= n;
 407                        dimm->location[j] = pos[j];
 408
 409                        if (len <= 0)
 410                                break;
 411                }
 412
 413                /* Link it to the csrows old API data */
 414                chan->dimm = dimm;
 415                dimm->csrow = row;
 416                dimm->cschannel = chn;
 417
 418                /* Increment csrow location */
 419                if (layers[0].is_virt_csrow) {
 420                        chn++;
 421                        if (chn == tot_channels) {
 422                                chn = 0;
 423                                row++;
 424                        }
 425                } else {
 426                        row++;
 427                        if (row == tot_csrows) {
 428                                row = 0;
 429                                chn++;
 430                        }
 431                }
 432
 433                /* Increment dimm location */
 434                for (j = n_layers - 1; j >= 0; j--) {
 435                        pos[j]++;
 436                        if (pos[j] < layers[j].size)
 437                                break;
 438                        pos[j] = 0;
 439                }
 440        }
 441
 442        mci->op_state = OP_ALLOC;
 443
 444        /* at this point, the root kobj is valid, and in order to
 445         * 'free' the object, then the function:
 446         *      edac_mc_unregister_sysfs_main_kobj() must be called
 447         * which will perform kobj unregistration and the actual free
 448         * will occur during the kobject callback operation
 449         */
 450
 451        return mci;
 452
 453error:
 454        _edac_mc_free(mci);
 455
 456        return NULL;
 457}
 458EXPORT_SYMBOL_GPL(edac_mc_alloc);
 459
 460/**
 461 * edac_mc_free
 462 *      'Free' a previously allocated 'mci' structure
 463 * @mci: pointer to a struct mem_ctl_info structure
 464 */
 465void edac_mc_free(struct mem_ctl_info *mci)
 466{
 467        edac_dbg(1, "\n");
 468
 469        /* If we're not yet registered with sysfs free only what was allocated
 470         * in edac_mc_alloc().
 471         */
 472        if (!device_is_registered(&mci->dev)) {
 473                _edac_mc_free(mci);
 474                return;
 475        }
 476
 477        /* the mci instance is freed here, when the sysfs object is dropped */
 478        edac_unregister_sysfs(mci);
 479}
 480EXPORT_SYMBOL_GPL(edac_mc_free);
 481
 482
 483/**
 484 * find_mci_by_dev
 485 *
 486 *      scan list of controllers looking for the one that manages
 487 *      the 'dev' device
 488 * @dev: pointer to a struct device related with the MCI
 489 */
 490struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 491{
 492        struct mem_ctl_info *mci;
 493        struct list_head *item;
 494
 495        edac_dbg(3, "\n");
 496
 497        list_for_each(item, &mc_devices) {
 498                mci = list_entry(item, struct mem_ctl_info, link);
 499
 500                if (mci->pdev == dev)
 501                        return mci;
 502        }
 503
 504        return NULL;
 505}
 506EXPORT_SYMBOL_GPL(find_mci_by_dev);
 507
 508/*
 509 * handler for EDAC to check if NMI type handler has asserted interrupt
 510 */
 511static int edac_mc_assert_error_check_and_clear(void)
 512{
 513        int old_state;
 514
 515        if (edac_op_state == EDAC_OPSTATE_POLL)
 516                return 1;
 517
 518        old_state = edac_err_assert;
 519        edac_err_assert = 0;
 520
 521        return old_state;
 522}
 523
 524/*
 525 * edac_mc_workq_function
 526 *      performs the operation scheduled by a workq request
 527 */
 528static void edac_mc_workq_function(struct work_struct *work_req)
 529{
 530        struct delayed_work *d_work = to_delayed_work(work_req);
 531        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 532
 533        mutex_lock(&mem_ctls_mutex);
 534
 535        /* if this control struct has movd to offline state, we are done */
 536        if (mci->op_state == OP_OFFLINE) {
 537                mutex_unlock(&mem_ctls_mutex);
 538                return;
 539        }
 540
 541        /* Only poll controllers that are running polled and have a check */
 542        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 543                mci->edac_check(mci);
 544
 545        mutex_unlock(&mem_ctls_mutex);
 546
 547        /* Reschedule */
 548        queue_delayed_work(edac_workqueue, &mci->work,
 549                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 550}
 551
 552/*
 553 * edac_mc_workq_setup
 554 *      initialize a workq item for this mci
 555 *      passing in the new delay period in msec
 556 *
 557 *      locking model:
 558 *
 559 *              called with the mem_ctls_mutex held
 560 */
 561static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 562{
 563        edac_dbg(0, "\n");
 564
 565        /* if this instance is not in the POLL state, then simply return */
 566        if (mci->op_state != OP_RUNNING_POLL)
 567                return;
 568
 569        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 570        mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 571}
 572
 573/*
 574 * edac_mc_workq_teardown
 575 *      stop the workq processing on this mci
 576 *
 577 *      locking model:
 578 *
 579 *              called WITHOUT lock held
 580 */
 581static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 582{
 583        int status;
 584
 585        if (mci->op_state != OP_RUNNING_POLL)
 586                return;
 587
 588        status = cancel_delayed_work(&mci->work);
 589        if (status == 0) {
 590                edac_dbg(0, "not canceled, flush the queue\n");
 591
 592                /* workq instance might be running, wait for it */
 593                flush_workqueue(edac_workqueue);
 594        }
 595}
 596
 597/*
 598 * edac_mc_reset_delay_period(unsigned long value)
 599 *
 600 *      user space has updated our poll period value, need to
 601 *      reset our workq delays
 602 */
 603void edac_mc_reset_delay_period(int value)
 604{
 605        struct mem_ctl_info *mci;
 606        struct list_head *item;
 607
 608        mutex_lock(&mem_ctls_mutex);
 609
 610        list_for_each(item, &mc_devices) {
 611                mci = list_entry(item, struct mem_ctl_info, link);
 612
 613                edac_mc_workq_setup(mci, (unsigned long) value);
 614        }
 615
 616        mutex_unlock(&mem_ctls_mutex);
 617}
 618
 619
 620
 621/* Return 0 on success, 1 on failure.
 622 * Before calling this function, caller must
 623 * assign a unique value to mci->mc_idx.
 624 *
 625 *      locking model:
 626 *
 627 *              called with the mem_ctls_mutex lock held
 628 */
 629static int add_mc_to_global_list(struct mem_ctl_info *mci)
 630{
 631        struct list_head *item, *insert_before;
 632        struct mem_ctl_info *p;
 633
 634        insert_before = &mc_devices;
 635
 636        p = find_mci_by_dev(mci->pdev);
 637        if (unlikely(p != NULL))
 638                goto fail0;
 639
 640        list_for_each(item, &mc_devices) {
 641                p = list_entry(item, struct mem_ctl_info, link);
 642
 643                if (p->mc_idx >= mci->mc_idx) {
 644                        if (unlikely(p->mc_idx == mci->mc_idx))
 645                                goto fail1;
 646
 647                        insert_before = item;
 648                        break;
 649                }
 650        }
 651
 652        list_add_tail_rcu(&mci->link, insert_before);
 653        atomic_inc(&edac_handlers);
 654        return 0;
 655
 656fail0:
 657        edac_printk(KERN_WARNING, EDAC_MC,
 658                "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 659                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 660        return 1;
 661
 662fail1:
 663        edac_printk(KERN_WARNING, EDAC_MC,
 664                "bug in low-level driver: attempt to assign\n"
 665                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 666        return 1;
 667}
 668
 669static void del_mc_from_global_list(struct mem_ctl_info *mci)
 670{
 671        atomic_dec(&edac_handlers);
 672        list_del_rcu(&mci->link);
 673
 674        /* these are for safe removal of devices from global list while
 675         * NMI handlers may be traversing list
 676         */
 677        synchronize_rcu();
 678        INIT_LIST_HEAD(&mci->link);
 679}
 680
 681/**
 682 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 683 *
 684 * If found, return a pointer to the structure.
 685 * Else return NULL.
 686 *
 687 * Caller must hold mem_ctls_mutex.
 688 */
 689struct mem_ctl_info *edac_mc_find(int idx)
 690{
 691        struct list_head *item;
 692        struct mem_ctl_info *mci;
 693
 694        list_for_each(item, &mc_devices) {
 695                mci = list_entry(item, struct mem_ctl_info, link);
 696
 697                if (mci->mc_idx >= idx) {
 698                        if (mci->mc_idx == idx)
 699                                return mci;
 700
 701                        break;
 702                }
 703        }
 704
 705        return NULL;
 706}
 707EXPORT_SYMBOL(edac_mc_find);
 708
 709/**
 710 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 711 *                 create sysfs entries associated with mci structure
 712 * @mci: pointer to the mci structure to be added to the list
 713 *
 714 * Return:
 715 *      0       Success
 716 *      !0      Failure
 717 */
 718
 719/* FIXME - should a warning be printed if no error detection? correction? */
 720int edac_mc_add_mc(struct mem_ctl_info *mci)
 721{
 722        edac_dbg(0, "\n");
 723
 724#ifdef CONFIG_EDAC_DEBUG
 725        if (edac_debug_level >= 3)
 726                edac_mc_dump_mci(mci);
 727
 728        if (edac_debug_level >= 4) {
 729                int i;
 730
 731                for (i = 0; i < mci->nr_csrows; i++) {
 732                        struct csrow_info *csrow = mci->csrows[i];
 733                        u32 nr_pages = 0;
 734                        int j;
 735
 736                        for (j = 0; j < csrow->nr_channels; j++)
 737                                nr_pages += csrow->channels[j]->dimm->nr_pages;
 738                        if (!nr_pages)
 739                                continue;
 740                        edac_mc_dump_csrow(csrow);
 741                        for (j = 0; j < csrow->nr_channels; j++)
 742                                if (csrow->channels[j]->dimm->nr_pages)
 743                                        edac_mc_dump_channel(csrow->channels[j]);
 744                }
 745                for (i = 0; i < mci->tot_dimms; i++)
 746                        if (mci->dimms[i]->nr_pages)
 747                                edac_mc_dump_dimm(mci->dimms[i], i);
 748        }
 749#endif
 750        mutex_lock(&mem_ctls_mutex);
 751
 752        if (add_mc_to_global_list(mci))
 753                goto fail0;
 754
 755        /* set load time so that error rate can be tracked */
 756        mci->start_time = jiffies;
 757
 758        if (edac_create_sysfs_mci_device(mci)) {
 759                edac_mc_printk(mci, KERN_WARNING,
 760                        "failed to create sysfs device\n");
 761                goto fail1;
 762        }
 763
 764        /* If there IS a check routine, then we are running POLLED */
 765        if (mci->edac_check != NULL) {
 766                /* This instance is NOW RUNNING */
 767                mci->op_state = OP_RUNNING_POLL;
 768
 769                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 770        } else {
 771                mci->op_state = OP_RUNNING_INTERRUPT;
 772        }
 773
 774        /* Report action taken */
 775        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 776                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 777
 778        mutex_unlock(&mem_ctls_mutex);
 779        return 0;
 780
 781fail1:
 782        del_mc_from_global_list(mci);
 783
 784fail0:
 785        mutex_unlock(&mem_ctls_mutex);
 786        return 1;
 787}
 788EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 789
 790/**
 791 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 792 *                 remove mci structure from global list
 793 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 794 *
 795 * Return pointer to removed mci structure, or NULL if device not found.
 796 */
 797struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 798{
 799        struct mem_ctl_info *mci;
 800
 801        edac_dbg(0, "\n");
 802
 803        mutex_lock(&mem_ctls_mutex);
 804
 805        /* find the requested mci struct in the global list */
 806        mci = find_mci_by_dev(dev);
 807        if (mci == NULL) {
 808                mutex_unlock(&mem_ctls_mutex);
 809                return NULL;
 810        }
 811
 812        del_mc_from_global_list(mci);
 813        mutex_unlock(&mem_ctls_mutex);
 814
 815        /* flush workq processes */
 816        edac_mc_workq_teardown(mci);
 817
 818        /* marking MCI offline */
 819        mci->op_state = OP_OFFLINE;
 820
 821        /* remove from sysfs */
 822        edac_remove_sysfs_mci_device(mci);
 823
 824        edac_printk(KERN_INFO, EDAC_MC,
 825                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 826                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 827
 828        return mci;
 829}
 830EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 831
 832static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 833                                u32 size)
 834{
 835        struct page *pg;
 836        void *virt_addr;
 837        unsigned long flags = 0;
 838
 839        edac_dbg(3, "\n");
 840
 841        /* ECC error page was not in our memory. Ignore it. */
 842        if (!pfn_valid(page))
 843                return;
 844
 845        /* Find the actual page structure then map it and fix */
 846        pg = pfn_to_page(page);
 847
 848        if (PageHighMem(pg))
 849                local_irq_save(flags);
 850
 851        virt_addr = kmap_atomic(pg);
 852
 853        /* Perform architecture specific atomic scrub operation */
 854        atomic_scrub(virt_addr + offset, size);
 855
 856        /* Unmap and complete */
 857        kunmap_atomic(virt_addr);
 858
 859        if (PageHighMem(pg))
 860                local_irq_restore(flags);
 861}
 862
 863/* FIXME - should return -1 */
 864int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 865{
 866        struct csrow_info **csrows = mci->csrows;
 867        int row, i, j, n;
 868
 869        edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 870        row = -1;
 871
 872        for (i = 0; i < mci->nr_csrows; i++) {
 873                struct csrow_info *csrow = csrows[i];
 874                n = 0;
 875                for (j = 0; j < csrow->nr_channels; j++) {
 876                        struct dimm_info *dimm = csrow->channels[j]->dimm;
 877                        n += dimm->nr_pages;
 878                }
 879                if (n == 0)
 880                        continue;
 881
 882                edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
 883                         mci->mc_idx,
 884                         csrow->first_page, page, csrow->last_page,
 885                         csrow->page_mask);
 886
 887                if ((page >= csrow->first_page) &&
 888                    (page <= csrow->last_page) &&
 889                    ((page & csrow->page_mask) ==
 890                     (csrow->first_page & csrow->page_mask))) {
 891                        row = i;
 892                        break;
 893                }
 894        }
 895
 896        if (row == -1)
 897                edac_mc_printk(mci, KERN_ERR,
 898                        "could not look up page error address %lx\n",
 899                        (unsigned long)page);
 900
 901        return row;
 902}
 903EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 904
 905const char *edac_layer_name[] = {
 906        [EDAC_MC_LAYER_BRANCH] = "branch",
 907        [EDAC_MC_LAYER_CHANNEL] = "channel",
 908        [EDAC_MC_LAYER_SLOT] = "slot",
 909        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 910};
 911EXPORT_SYMBOL_GPL(edac_layer_name);
 912
 913static void edac_inc_ce_error(struct mem_ctl_info *mci,
 914                              bool enable_per_layer_report,
 915                              const int pos[EDAC_MAX_LAYERS],
 916                              const u16 count)
 917{
 918        int i, index = 0;
 919
 920        mci->ce_mc += count;
 921
 922        if (!enable_per_layer_report) {
 923                mci->ce_noinfo_count += count;
 924                return;
 925        }
 926
 927        for (i = 0; i < mci->n_layers; i++) {
 928                if (pos[i] < 0)
 929                        break;
 930                index += pos[i];
 931                mci->ce_per_layer[i][index] += count;
 932
 933                if (i < mci->n_layers - 1)
 934                        index *= mci->layers[i + 1].size;
 935        }
 936}
 937
 938static void edac_inc_ue_error(struct mem_ctl_info *mci,
 939                                    bool enable_per_layer_report,
 940                                    const int pos[EDAC_MAX_LAYERS],
 941                                    const u16 count)
 942{
 943        int i, index = 0;
 944
 945        mci->ue_mc += count;
 946
 947        if (!enable_per_layer_report) {
 948                mci->ce_noinfo_count += count;
 949                return;
 950        }
 951
 952        for (i = 0; i < mci->n_layers; i++) {
 953                if (pos[i] < 0)
 954                        break;
 955                index += pos[i];
 956                mci->ue_per_layer[i][index] += count;
 957
 958                if (i < mci->n_layers - 1)
 959                        index *= mci->layers[i + 1].size;
 960        }
 961}
 962
 963static void edac_ce_error(struct mem_ctl_info *mci,
 964                          const u16 error_count,
 965                          const int pos[EDAC_MAX_LAYERS],
 966                          const char *msg,
 967                          const char *location,
 968                          const char *label,
 969                          const char *detail,
 970                          const char *other_detail,
 971                          const bool enable_per_layer_report,
 972                          const unsigned long page_frame_number,
 973                          const unsigned long offset_in_page,
 974                          long grain)
 975{
 976        unsigned long remapped_page;
 977
 978        if (edac_mc_get_log_ce()) {
 979                if (other_detail && *other_detail)
 980                        edac_mc_printk(mci, KERN_WARNING,
 981                                       "%d CE %s on %s (%s %s - %s)\n",
 982                                       error_count,
 983                                       msg, label, location,
 984                                       detail, other_detail);
 985                else
 986                        edac_mc_printk(mci, KERN_WARNING,
 987                                       "%d CE %s on %s (%s %s)\n",
 988                                       error_count,
 989                                       msg, label, location,
 990                                       detail);
 991        }
 992        edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
 993
 994        if (mci->scrub_mode & SCRUB_SW_SRC) {
 995                /*
 996                        * Some memory controllers (called MCs below) can remap
 997                        * memory so that it is still available at a different
 998                        * address when PCI devices map into memory.
 999                        * MC's that can't do this, lose the memory where PCI
1000                        * devices are mapped. This mapping is MC-dependent
1001                        * and so we call back into the MC driver for it to
1002                        * map the MC page to a physical (CPU) page which can
1003                        * then be mapped to a virtual page - which can then
1004                        * be scrubbed.
1005                        */
1006                remapped_page = mci->ctl_page_to_phys ?
1007                        mci->ctl_page_to_phys(mci, page_frame_number) :
1008                        page_frame_number;
1009
1010                edac_mc_scrub_block(remapped_page,
1011                                        offset_in_page, grain);
1012        }
1013}
1014
1015static void edac_ue_error(struct mem_ctl_info *mci,
1016                          const u16 error_count,
1017                          const int pos[EDAC_MAX_LAYERS],
1018                          const char *msg,
1019                          const char *location,
1020                          const char *label,
1021                          const char *detail,
1022                          const char *other_detail,
1023                          const bool enable_per_layer_report)
1024{
1025        if (edac_mc_get_log_ue()) {
1026                if (other_detail && *other_detail)
1027                        edac_mc_printk(mci, KERN_WARNING,
1028                                       "%d UE %s on %s (%s %s - %s)\n",
1029                                       error_count,
1030                                       msg, label, location, detail,
1031                                       other_detail);
1032                else
1033                        edac_mc_printk(mci, KERN_WARNING,
1034                                       "%d UE %s on %s (%s %s)\n",
1035                                       error_count,
1036                                       msg, label, location, detail);
1037        }
1038
1039        if (edac_mc_get_panic_on_ue()) {
1040                if (other_detail && *other_detail)
1041                        panic("UE %s on %s (%s%s - %s)\n",
1042                              msg, label, location, detail, other_detail);
1043                else
1044                        panic("UE %s on %s (%s%s)\n",
1045                              msg, label, location, detail);
1046        }
1047
1048        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1049}
1050
1051#define OTHER_LABEL " or "
1052
1053/**
1054 * edac_mc_handle_error - reports a memory event to userspace
1055 *
1056 * @type:               severity of the error (CE/UE/Fatal)
1057 * @mci:                a struct mem_ctl_info pointer
1058 * @error_count:        Number of errors of the same type
1059 * @page_frame_number:  mem page where the error occurred
1060 * @offset_in_page:     offset of the error inside the page
1061 * @syndrome:           ECC syndrome
1062 * @top_layer:          Memory layer[0] position
1063 * @mid_layer:          Memory layer[1] position
1064 * @low_layer:          Memory layer[2] position
1065 * @msg:                Message meaningful to the end users that
1066 *                      explains the event
1067 * @other_detail:       Technical details about the event that
1068 *                      may help hardware manufacturers and
1069 *                      EDAC developers to analyse the event
1070 */
1071void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1072                          struct mem_ctl_info *mci,
1073                          const u16 error_count,
1074                          const unsigned long page_frame_number,
1075                          const unsigned long offset_in_page,
1076                          const unsigned long syndrome,
1077                          const int top_layer,
1078                          const int mid_layer,
1079                          const int low_layer,
1080                          const char *msg,
1081                          const char *other_detail)
1082{
1083        /* FIXME: too much for stack: move it to some pre-alocated area */
1084        char detail[80], location[80];
1085        char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1086        char *p;
1087        int row = -1, chan = -1;
1088        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1089        int i;
1090        long grain;
1091        bool enable_per_layer_report = false;
1092        u8 grain_bits;
1093
1094        edac_dbg(3, "MC%d\n", mci->mc_idx);
1095
1096        /*
1097         * Check if the event report is consistent and if the memory
1098         * location is known. If it is known, enable_per_layer_report will be
1099         * true, the DIMM(s) label info will be filled and the per-layer
1100         * error counters will be incremented.
1101         */
1102        for (i = 0; i < mci->n_layers; i++) {
1103                if (pos[i] >= (int)mci->layers[i].size) {
1104                        if (type == HW_EVENT_ERR_CORRECTED)
1105                                p = "CE";
1106                        else
1107                                p = "UE";
1108
1109                        edac_mc_printk(mci, KERN_ERR,
1110                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1111                                       edac_layer_name[mci->layers[i].type],
1112                                       pos[i], mci->layers[i].size);
1113                        /*
1114                         * Instead of just returning it, let's use what's
1115                         * known about the error. The increment routines and
1116                         * the DIMM filter logic will do the right thing by
1117                         * pointing the likely damaged DIMMs.
1118                         */
1119                        pos[i] = -1;
1120                }
1121                if (pos[i] >= 0)
1122                        enable_per_layer_report = true;
1123        }
1124
1125        /*
1126         * Get the dimm label/grain that applies to the match criteria.
1127         * As the error algorithm may not be able to point to just one memory
1128         * stick, the logic here will get all possible labels that could
1129         * pottentially be affected by the error.
1130         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1131         * to have only the MC channel and the MC dimm (also called "branch")
1132         * but the channel is not known, as the memory is arranged in pairs,
1133         * where each memory belongs to a separate channel within the same
1134         * branch.
1135         */
1136        grain = 0;
1137        p = label;
1138        *p = '\0';
1139        for (i = 0; i < mci->tot_dimms; i++) {
1140                struct dimm_info *dimm = mci->dimms[i];
1141
1142                if (top_layer >= 0 && top_layer != dimm->location[0])
1143                        continue;
1144                if (mid_layer >= 0 && mid_layer != dimm->location[1])
1145                        continue;
1146                if (low_layer >= 0 && low_layer != dimm->location[2])
1147                        continue;
1148
1149                /* get the max grain, over the error match range */
1150                if (dimm->grain > grain)
1151                        grain = dimm->grain;
1152
1153                /*
1154                 * If the error is memory-controller wide, there's no need to
1155                 * seek for the affected DIMMs because the whole
1156                 * channel/memory controller/...  may be affected.
1157                 * Also, don't show errors for empty DIMM slots.
1158                 */
1159                if (enable_per_layer_report && dimm->nr_pages) {
1160                        if (p != label) {
1161                                strcpy(p, OTHER_LABEL);
1162                                p += strlen(OTHER_LABEL);
1163                        }
1164                        strcpy(p, dimm->label);
1165                        p += strlen(p);
1166                        *p = '\0';
1167
1168                        /*
1169                         * get csrow/channel of the DIMM, in order to allow
1170                         * incrementing the compat API counters
1171                         */
1172                        edac_dbg(4, "%s csrows map: (%d,%d)\n",
1173                                 mci->mem_is_per_rank ? "rank" : "dimm",
1174                                 dimm->csrow, dimm->cschannel);
1175                        if (row == -1)
1176                                row = dimm->csrow;
1177                        else if (row >= 0 && row != dimm->csrow)
1178                                row = -2;
1179
1180                        if (chan == -1)
1181                                chan = dimm->cschannel;
1182                        else if (chan >= 0 && chan != dimm->cschannel)
1183                                chan = -2;
1184                }
1185        }
1186
1187        if (!enable_per_layer_report) {
1188                strcpy(label, "any memory");
1189        } else {
1190                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1191                if (p == label)
1192                        strcpy(label, "unknown memory");
1193                if (type == HW_EVENT_ERR_CORRECTED) {
1194                        if (row >= 0) {
1195                                mci->csrows[row]->ce_count += error_count;
1196                                if (chan >= 0)
1197                                        mci->csrows[row]->channels[chan]->ce_count += error_count;
1198                        }
1199                } else
1200                        if (row >= 0)
1201                                mci->csrows[row]->ue_count += error_count;
1202        }
1203
1204        /* Fill the RAM location data */
1205        p = location;
1206        for (i = 0; i < mci->n_layers; i++) {
1207                if (pos[i] < 0)
1208                        continue;
1209
1210                p += sprintf(p, "%s:%d ",
1211                             edac_layer_name[mci->layers[i].type],
1212                             pos[i]);
1213        }
1214        if (p > location)
1215                *(p - 1) = '\0';
1216
1217        /* Report the error via the trace interface */
1218
1219        grain_bits = fls_long(grain) + 1;
1220        trace_mc_event(type, msg, label, error_count,
1221                       mci->mc_idx, top_layer, mid_layer, low_layer,
1222                       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1223                       grain_bits, syndrome, other_detail);
1224
1225        /* Memory type dependent details about the error */
1226        if (type == HW_EVENT_ERR_CORRECTED) {
1227                snprintf(detail, sizeof(detail),
1228                        "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1229                        page_frame_number, offset_in_page,
1230                        grain, syndrome);
1231                edac_ce_error(mci, error_count, pos, msg, location, label,
1232                              detail, other_detail, enable_per_layer_report,
1233                              page_frame_number, offset_in_page, grain);
1234        } else {
1235                snprintf(detail, sizeof(detail),
1236                        "page:0x%lx offset:0x%lx grain:%ld",
1237                        page_frame_number, offset_in_page, grain);
1238
1239                edac_ue_error(mci, error_count, pos, msg, location, label,
1240                              detail, other_detail, enable_per_layer_report);
1241        }
1242}
1243EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1244
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.