linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <linux/bitops.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37#define CREATE_TRACE_POINTS
  38#define TRACE_INCLUDE_PATH ../../include/ras
  39#include <ras/ras_event.h>
  40
  41/* lock to memory controller's control array */
  42static DEFINE_MUTEX(mem_ctls_mutex);
  43static LIST_HEAD(mc_devices);
  44
  45unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
  46                                 unsigned len)
  47{
  48        struct mem_ctl_info *mci = dimm->mci;
  49        int i, n, count = 0;
  50        char *p = buf;
  51
  52        for (i = 0; i < mci->n_layers; i++) {
  53                n = snprintf(p, len, "%s %d ",
  54                              edac_layer_name[mci->layers[i].type],
  55                              dimm->location[i]);
  56                p += n;
  57                len -= n;
  58                count += n;
  59                if (!len)
  60                        break;
  61        }
  62
  63        return count;
  64}
  65
  66#ifdef CONFIG_EDAC_DEBUG
  67
  68static void edac_mc_dump_channel(struct rank_info *chan)
  69{
  70        edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
  71        edac_dbg(4, "    channel = %p\n", chan);
  72        edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
  73        edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
  74}
  75
  76static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
  77{
  78        char location[80];
  79
  80        edac_dimm_info_location(dimm, location, sizeof(location));
  81
  82        edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
  83                 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
  84                 number, location, dimm->csrow, dimm->cschannel);
  85        edac_dbg(4, "  dimm = %p\n", dimm);
  86        edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
  87        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  88        edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
  89        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  90}
  91
  92static void edac_mc_dump_csrow(struct csrow_info *csrow)
  93{
  94        edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
  95        edac_dbg(4, "  csrow = %p\n", csrow);
  96        edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
  97        edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
  98        edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
  99        edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
 100        edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
 101        edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 102}
 103
 104static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 105{
 106        edac_dbg(3, "\tmci = %p\n", mci);
 107        edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
 108        edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
 109        edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
 110        edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
 111        edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
 112                 mci->nr_csrows, mci->csrows);
 113        edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
 114                 mci->tot_dimms, mci->dimms);
 115        edac_dbg(3, "\tdev = %p\n", mci->pdev);
 116        edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
 117                 mci->mod_name, mci->ctl_name);
 118        edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 119}
 120
 121#endif                          /* CONFIG_EDAC_DEBUG */
 122
 123/*
 124 * keep those in sync with the enum mem_type
 125 */
 126const char *edac_mem_types[] = {
 127        "Empty csrow",
 128        "Reserved csrow type",
 129        "Unknown csrow type",
 130        "Fast page mode RAM",
 131        "Extended data out RAM",
 132        "Burst Extended data out RAM",
 133        "Single data rate SDRAM",
 134        "Registered single data rate SDRAM",
 135        "Double data rate SDRAM",
 136        "Registered Double data rate SDRAM",
 137        "Rambus DRAM",
 138        "Unbuffered DDR2 RAM",
 139        "Fully buffered DDR2",
 140        "Registered DDR2 RAM",
 141        "Rambus XDR",
 142        "Unbuffered DDR3 RAM",
 143        "Registered DDR3 RAM",
 144};
 145EXPORT_SYMBOL_GPL(edac_mem_types);
 146
 147/**
 148 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 149 * @p:          pointer to a pointer with the memory offset to be used. At
 150 *              return, this will be incremented to point to the next offset
 151 * @size:       Size of the data structure to be reserved
 152 * @n_elems:    Number of elements that should be reserved
 153 *
 154 * If 'size' is a constant, the compiler will optimize this whole function
 155 * down to either a no-op or the addition of a constant to the value of '*p'.
 156 *
 157 * The 'p' pointer is absolutely needed to keep the proper advancing
 158 * further in memory to the proper offsets when allocating the struct along
 159 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 160 * above, for example.
 161 *
 162 * At return, the pointer 'p' will be incremented to be used on a next call
 163 * to this function.
 164 */
 165void *edac_align_ptr(void **p, unsigned size, int n_elems)
 166{
 167        unsigned align, r;
 168        void *ptr = *p;
 169
 170        *p += size * n_elems;
 171
 172        /*
 173         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 174         * 'size'.  Adjust 'p' so that its alignment is at least as
 175         * stringent as what the compiler would provide for X and return
 176         * the aligned result.
 177         * Here we assume that the alignment of a "long long" is the most
 178         * stringent alignment that the compiler will ever provide by default.
 179         * As far as I know, this is a reasonable assumption.
 180         */
 181        if (size > sizeof(long))
 182                align = sizeof(long long);
 183        else if (size > sizeof(int))
 184                align = sizeof(long);
 185        else if (size > sizeof(short))
 186                align = sizeof(int);
 187        else if (size > sizeof(char))
 188                align = sizeof(short);
 189        else
 190                return (char *)ptr;
 191
 192        r = (unsigned long)p % align;
 193
 194        if (r == 0)
 195                return (char *)ptr;
 196
 197        *p += align - r;
 198
 199        return (void *)(((unsigned long)ptr) + align - r);
 200}
 201
 202static void _edac_mc_free(struct mem_ctl_info *mci)
 203{
 204        int i, chn, row;
 205        struct csrow_info *csr;
 206        const unsigned int tot_dimms = mci->tot_dimms;
 207        const unsigned int tot_channels = mci->num_cschannel;
 208        const unsigned int tot_csrows = mci->nr_csrows;
 209
 210        if (mci->dimms) {
 211                for (i = 0; i < tot_dimms; i++)
 212                        kfree(mci->dimms[i]);
 213                kfree(mci->dimms);
 214        }
 215        if (mci->csrows) {
 216                for (row = 0; row < tot_csrows; row++) {
 217                        csr = mci->csrows[row];
 218                        if (csr) {
 219                                if (csr->channels) {
 220                                        for (chn = 0; chn < tot_channels; chn++)
 221                                                kfree(csr->channels[chn]);
 222                                        kfree(csr->channels);
 223                                }
 224                                kfree(csr);
 225                        }
 226                }
 227                kfree(mci->csrows);
 228        }
 229        kfree(mci);
 230}
 231
 232/**
 233 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 234 * @mc_num:             Memory controller number
 235 * @n_layers:           Number of MC hierarchy layers
 236 * layers:              Describes each layer as seen by the Memory Controller
 237 * @size_pvt:           size of private storage needed
 238 *
 239 *
 240 * Everything is kmalloc'ed as one big chunk - more efficient.
 241 * Only can be used if all structures have the same lifetime - otherwise
 242 * you have to allocate and initialize your own structures.
 243 *
 244 * Use edac_mc_free() to free mc structures allocated by this function.
 245 *
 246 * NOTE: drivers handle multi-rank memories in different ways: in some
 247 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 248 * others, a single multi-rank memory stick would be mapped into several
 249 * entries. Currently, this function will allocate multiple struct dimm_info
 250 * on such scenarios, as grouping the multiple ranks require drivers change.
 251 *
 252 * Returns:
 253 *      On failure: NULL
 254 *      On success: struct mem_ctl_info pointer
 255 */
 256struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 257                                   unsigned n_layers,
 258                                   struct edac_mc_layer *layers,
 259                                   unsigned sz_pvt)
 260{
 261        struct mem_ctl_info *mci;
 262        struct edac_mc_layer *layer;
 263        struct csrow_info *csr;
 264        struct rank_info *chan;
 265        struct dimm_info *dimm;
 266        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 267        unsigned pos[EDAC_MAX_LAYERS];
 268        unsigned size, tot_dimms = 1, count = 1;
 269        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 270        void *pvt, *p, *ptr = NULL;
 271        int i, j, row, chn, n, len, off;
 272        bool per_rank = false;
 273
 274        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 275        /*
 276         * Calculate the total amount of dimms and csrows/cschannels while
 277         * in the old API emulation mode
 278         */
 279        for (i = 0; i < n_layers; i++) {
 280                tot_dimms *= layers[i].size;
 281                if (layers[i].is_virt_csrow)
 282                        tot_csrows *= layers[i].size;
 283                else
 284                        tot_channels *= layers[i].size;
 285
 286                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 287                        per_rank = true;
 288        }
 289
 290        /* Figure out the offsets of the various items from the start of an mc
 291         * structure.  We want the alignment of each item to be at least as
 292         * stringent as what the compiler would provide if we could simply
 293         * hardcode everything into a single struct.
 294         */
 295        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 296        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 297        for (i = 0; i < n_layers; i++) {
 298                count *= layers[i].size;
 299                edac_dbg(4, "errcount layer %d size %d\n", i, count);
 300                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 301                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 302                tot_errcount += 2 * count;
 303        }
 304
 305        edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 306        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 307        size = ((unsigned long)pvt) + sz_pvt;
 308
 309        edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 310                 size,
 311                 tot_dimms,
 312                 per_rank ? "ranks" : "dimms",
 313                 tot_csrows * tot_channels);
 314
 315        mci = kzalloc(size, GFP_KERNEL);
 316        if (mci == NULL)
 317                return NULL;
 318
 319        /* Adjust pointers so they point within the memory we just allocated
 320         * rather than an imaginary chunk of memory located at address 0.
 321         */
 322        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 323        for (i = 0; i < n_layers; i++) {
 324                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 325                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 326        }
 327        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 328
 329        /* setup index and various internal pointers */
 330        mci->mc_idx = mc_num;
 331        mci->tot_dimms = tot_dimms;
 332        mci->pvt_info = pvt;
 333        mci->n_layers = n_layers;
 334        mci->layers = layer;
 335        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 336        mci->nr_csrows = tot_csrows;
 337        mci->num_cschannel = tot_channels;
 338        mci->mem_is_per_rank = per_rank;
 339
 340        /*
 341         * Alocate and fill the csrow/channels structs
 342         */
 343        mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
 344        if (!mci->csrows)
 345                goto error;
 346        for (row = 0; row < tot_csrows; row++) {
 347                csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 348                if (!csr)
 349                        goto error;
 350                mci->csrows[row] = csr;
 351                csr->csrow_idx = row;
 352                csr->mci = mci;
 353                csr->nr_channels = tot_channels;
 354                csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
 355                                        GFP_KERNEL);
 356                if (!csr->channels)
 357                        goto error;
 358
 359                for (chn = 0; chn < tot_channels; chn++) {
 360                        chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 361                        if (!chan)
 362                                goto error;
 363                        csr->channels[chn] = chan;
 364                        chan->chan_idx = chn;
 365                        chan->csrow = csr;
 366                }
 367        }
 368
 369        /*
 370         * Allocate and fill the dimm structs
 371         */
 372        mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
 373        if (!mci->dimms)
 374                goto error;
 375
 376        memset(&pos, 0, sizeof(pos));
 377        row = 0;
 378        chn = 0;
 379        for (i = 0; i < tot_dimms; i++) {
 380                chan = mci->csrows[row]->channels[chn];
 381                off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 382                if (off < 0 || off >= tot_dimms) {
 383                        edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 384                        goto error;
 385                }
 386
 387                dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 388                if (!dimm)
 389                        goto error;
 390                mci->dimms[off] = dimm;
 391                dimm->mci = mci;
 392
 393                /*
 394                 * Copy DIMM location and initialize it.
 395                 */
 396                len = sizeof(dimm->label);
 397                p = dimm->label;
 398                n = snprintf(p, len, "mc#%u", mc_num);
 399                p += n;
 400                len -= n;
 401                for (j = 0; j < n_layers; j++) {
 402                        n = snprintf(p, len, "%s#%u",
 403                                     edac_layer_name[layers[j].type],
 404                                     pos[j]);
 405                        p += n;
 406                        len -= n;
 407                        dimm->location[j] = pos[j];
 408
 409                        if (len <= 0)
 410                                break;
 411                }
 412
 413                /* Link it to the csrows old API data */
 414                chan->dimm = dimm;
 415                dimm->csrow = row;
 416                dimm->cschannel = chn;
 417
 418                /* Increment csrow location */
 419                if (layers[0].is_virt_csrow) {
 420                        chn++;
 421                        if (chn == tot_channels) {
 422                                chn = 0;
 423                                row++;
 424                        }
 425                } else {
 426                        row++;
 427                        if (row == tot_csrows) {
 428                                row = 0;
 429                                chn++;
 430                        }
 431                }
 432
 433                /* Increment dimm location */
 434                for (j = n_layers - 1; j >= 0; j--) {
 435                        pos[j]++;
 436                        if (pos[j] < layers[j].size)
 437                                break;
 438                        pos[j] = 0;
 439                }
 440        }
 441
 442        mci->op_state = OP_ALLOC;
 443
 444        /* at this point, the root kobj is valid, and in order to
 445         * 'free' the object, then the function:
 446         *      edac_mc_unregister_sysfs_main_kobj() must be called
 447         * which will perform kobj unregistration and the actual free
 448         * will occur during the kobject callback operation
 449         */
 450
 451        return mci;
 452
 453error:
 454        _edac_mc_free(mci);
 455
 456        return NULL;
 457}
 458EXPORT_SYMBOL_GPL(edac_mc_alloc);
 459
 460/**
 461 * edac_mc_free
 462 *      'Free' a previously allocated 'mci' structure
 463 * @mci: pointer to a struct mem_ctl_info structure
 464 */
 465void edac_mc_free(struct mem_ctl_info *mci)
 466{
 467        edac_dbg(1, "\n");
 468
 469        /* If we're not yet registered with sysfs free only what was allocated
 470         * in edac_mc_alloc().
 471         */
 472        if (!device_is_registered(&mci->dev)) {
 473                _edac_mc_free(mci);
 474                return;
 475        }
 476
 477        /* the mci instance is freed here, when the sysfs object is dropped */
 478        edac_unregister_sysfs(mci);
 479}
 480EXPORT_SYMBOL_GPL(edac_mc_free);
 481
 482
 483/**
 484 * find_mci_by_dev
 485 *
 486 *      scan list of controllers looking for the one that manages
 487 *      the 'dev' device
 488 * @dev: pointer to a struct device related with the MCI
 489 */
 490struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 491{
 492        struct mem_ctl_info *mci;
 493        struct list_head *item;
 494
 495        edac_dbg(3, "\n");
 496
 497        list_for_each(item, &mc_devices) {
 498                mci = list_entry(item, struct mem_ctl_info, link);
 499
 500                if (mci->pdev == dev)
 501                        return mci;
 502        }
 503
 504        return NULL;
 505}
 506EXPORT_SYMBOL_GPL(find_mci_by_dev);
 507
 508/*
 509 * handler for EDAC to check if NMI type handler has asserted interrupt
 510 */
 511static int edac_mc_assert_error_check_and_clear(void)
 512{
 513        int old_state;
 514
 515        if (edac_op_state == EDAC_OPSTATE_POLL)
 516                return 1;
 517
 518        old_state = edac_err_assert;
 519        edac_err_assert = 0;
 520
 521        return old_state;
 522}
 523
 524/*
 525 * edac_mc_workq_function
 526 *      performs the operation scheduled by a workq request
 527 */
 528static void edac_mc_workq_function(struct work_struct *work_req)
 529{
 530        struct delayed_work *d_work = to_delayed_work(work_req);
 531        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 532
 533        mutex_lock(&mem_ctls_mutex);
 534
 535        /* if this control struct has movd to offline state, we are done */
 536        if (mci->op_state == OP_OFFLINE) {
 537                mutex_unlock(&mem_ctls_mutex);
 538                return;
 539        }
 540
 541        /* Only poll controllers that are running polled and have a check */
 542        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 543                mci->edac_check(mci);
 544
 545        mutex_unlock(&mem_ctls_mutex);
 546
 547        /* Reschedule */
 548        queue_delayed_work(edac_workqueue, &mci->work,
 549                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 550}
 551
 552/*
 553 * edac_mc_workq_setup
 554 *      initialize a workq item for this mci
 555 *      passing in the new delay period in msec
 556 *
 557 *      locking model:
 558 *
 559 *              called with the mem_ctls_mutex held
 560 */
 561static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 562{
 563        edac_dbg(0, "\n");
 564
 565        /* if this instance is not in the POLL state, then simply return */
 566        if (mci->op_state != OP_RUNNING_POLL)
 567                return;
 568
 569        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 570        mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 571}
 572
 573/*
 574 * edac_mc_workq_teardown
 575 *      stop the workq processing on this mci
 576 *
 577 *      locking model:
 578 *
 579 *              called WITHOUT lock held
 580 */
 581static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 582{
 583        int status;
 584
 585        if (mci->op_state != OP_RUNNING_POLL)
 586                return;
 587
 588        status = cancel_delayed_work(&mci->work);
 589        if (status == 0) {
 590                edac_dbg(0, "not canceled, flush the queue\n");
 591
 592                /* workq instance might be running, wait for it */
 593                flush_workqueue(edac_workqueue);
 594        }
 595}
 596
 597/*
 598 * edac_mc_reset_delay_period(unsigned long value)
 599 *
 600 *      user space has updated our poll period value, need to
 601 *      reset our workq delays
 602 */
 603void edac_mc_reset_delay_period(int value)
 604{
 605        struct mem_ctl_info *mci;
 606        struct list_head *item;
 607
 608        mutex_lock(&mem_ctls_mutex);
 609
 610        list_for_each(item, &mc_devices) {
 611                mci = list_entry(item, struct mem_ctl_info, link);
 612
 613                edac_mc_workq_setup(mci, (unsigned long) value);
 614        }
 615
 616        mutex_unlock(&mem_ctls_mutex);
 617}
 618
 619
 620
 621/* Return 0 on success, 1 on failure.
 622 * Before calling this function, caller must
 623 * assign a unique value to mci->mc_idx.
 624 *
 625 *      locking model:
 626 *
 627 *              called with the mem_ctls_mutex lock held
 628 */
 629static int add_mc_to_global_list(struct mem_ctl_info *mci)
 630{
 631        struct list_head *item, *insert_before;
 632        struct mem_ctl_info *p;
 633
 634        insert_before = &mc_devices;
 635
 636        p = find_mci_by_dev(mci->pdev);
 637        if (unlikely(p != NULL))
 638                goto fail0;
 639
 640        list_for_each(item, &mc_devices) {
 641                p = list_entry(item, struct mem_ctl_info, link);
 642
 643                if (p->mc_idx >= mci->mc_idx) {
 644                        if (unlikely(p->mc_idx == mci->mc_idx))
 645                                goto fail1;
 646
 647                        insert_before = item;
 648                        break;
 649                }
 650        }
 651
 652        list_add_tail_rcu(&mci->link, insert_before);
 653        atomic_inc(&edac_handlers);
 654        return 0;
 655
 656fail0:
 657        edac_printk(KERN_WARNING, EDAC_MC,
 658                "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 659                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 660        return 1;
 661
 662fail1:
 663        edac_printk(KERN_WARNING, EDAC_MC,
 664                "bug in low-level driver: attempt to assign\n"
 665                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 666        return 1;
 667}
 668
 669static void del_mc_from_global_list(struct mem_ctl_info *mci)
 670{
 671        atomic_dec(&edac_handlers);
 672        list_del_rcu(&mci->link);
 673
 674        /* these are for safe removal of devices from global list while
 675         * NMI handlers may be traversing list
 676         */
 677        synchronize_rcu();
 678        INIT_LIST_HEAD(&mci->link);
 679}
 680
 681/**
 682 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 683 *
 684 * If found, return a pointer to the structure.
 685 * Else return NULL.
 686 *
 687 * Caller must hold mem_ctls_mutex.
 688 */
 689struct mem_ctl_info *edac_mc_find(int idx)
 690{
 691        struct list_head *item;
 692        struct mem_ctl_info *mci;
 693
 694        list_for_each(item, &mc_devices) {
 695                mci = list_entry(item, struct mem_ctl_info, link);
 696
 697                if (mci->mc_idx >= idx) {
 698                        if (mci->mc_idx == idx)
 699                                return mci;
 700
 701                        break;
 702                }
 703        }
 704
 705        return NULL;
 706}
 707EXPORT_SYMBOL(edac_mc_find);
 708
 709/**
 710 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 711 *                 create sysfs entries associated with mci structure
 712 * @mci: pointer to the mci structure to be added to the list
 713 *
 714 * Return:
 715 *      0       Success
 716 *      !0      Failure
 717 */
 718
 719/* FIXME - should a warning be printed if no error detection? correction? */
 720int edac_mc_add_mc(struct mem_ctl_info *mci)
 721{
 722        edac_dbg(0, "\n");
 723
 724#ifdef CONFIG_EDAC_DEBUG
 725        if (edac_debug_level >= 3)
 726                edac_mc_dump_mci(mci);
 727
 728        if (edac_debug_level >= 4) {
 729                int i;
 730
 731                for (i = 0; i < mci->nr_csrows; i++) {
 732                        struct csrow_info *csrow = mci->csrows[i];
 733                        u32 nr_pages = 0;
 734                        int j;
 735
 736                        for (j = 0; j < csrow->nr_channels; j++)
 737                                nr_pages += csrow->channels[j]->dimm->nr_pages;
 738                        if (!nr_pages)
 739                                continue;
 740                        edac_mc_dump_csrow(csrow);
 741                        for (j = 0; j < csrow->nr_channels; j++)
 742                                if (csrow->channels[j]->dimm->nr_pages)
 743                                        edac_mc_dump_channel(csrow->channels[j]);
 744                }
 745                for (i = 0; i < mci->tot_dimms; i++)
 746                        if (mci->dimms[i]->nr_pages)
 747                                edac_mc_dump_dimm(mci->dimms[i], i);
 748        }
 749#endif
 750        mutex_lock(&mem_ctls_mutex);
 751
 752        if (add_mc_to_global_list(mci))
 753                goto fail0;
 754
 755        /* set load time so that error rate can be tracked */
 756        mci->start_time = jiffies;
 757
 758        if (edac_create_sysfs_mci_device(mci)) {
 759                edac_mc_printk(mci, KERN_WARNING,
 760                        "failed to create sysfs device\n");
 761                goto fail1;
 762        }
 763
 764        /* If there IS a check routine, then we are running POLLED */
 765        if (mci->edac_check != NULL) {
 766                /* This instance is NOW RUNNING */
 767                mci->op_state = OP_RUNNING_POLL;
 768
 769                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 770        } else {
 771                mci->op_state = OP_RUNNING_INTERRUPT;
 772        }
 773
 774        /* Report action taken */
 775        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 776                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 777
 778        mutex_unlock(&mem_ctls_mutex);
 779        return 0;
 780
 781fail1:
 782        del_mc_from_global_list(mci);
 783
 784fail0:
 785        mutex_unlock(&mem_ctls_mutex);
 786        return 1;
 787}
 788EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 789
 790/**
 791 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 792 *                 remove mci structure from global list
 793 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 794 *
 795 * Return pointer to removed mci structure, or NULL if device not found.
 796 */
 797struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 798{
 799        struct mem_ctl_info *mci;
 800
 801        edac_dbg(0, "\n");
 802
 803        mutex_lock(&mem_ctls_mutex);
 804
 805        /* find the requested mci struct in the global list */
 806        mci = find_mci_by_dev(dev);
 807        if (mci == NULL) {
 808                mutex_unlock(&mem_ctls_mutex);
 809                return NULL;
 810        }
 811
 812        del_mc_from_global_list(mci);
 813        mutex_unlock(&mem_ctls_mutex);
 814
 815        /* flush workq processes */
 816        edac_mc_workq_teardown(mci);
 817
 818        /* marking MCI offline */
 819        mci->op_state = OP_OFFLINE;
 820
 821        /* remove from sysfs */
 822        edac_remove_sysfs_mci_device(mci);
 823
 824        edac_printk(KERN_INFO, EDAC_MC,
 825                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 826                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 827
 828        return mci;
 829}
 830EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 831
 832static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 833                                u32 size)
 834{
 835        struct page *pg;
 836        void *virt_addr;
 837        unsigned long flags = 0;
 838
 839        edac_dbg(3, "\n");
 840
 841        /* ECC error page was not in our memory. Ignore it. */
 842        if (!pfn_valid(page))
 843                return;
 844
 845        /* Find the actual page structure then map it and fix */
 846        pg = pfn_to_page(page);
 847
 848        if (PageHighMem(pg))
 849                local_irq_save(flags);
 850
 851        virt_addr = kmap_atomic(pg);
 852
 853        /* Perform architecture specific atomic scrub operation */
 854        atomic_scrub(virt_addr + offset, size);
 855
 856        /* Unmap and complete */
 857        kunmap_atomic(virt_addr);
 858
 859        if (PageHighMem(pg))
 860                local_irq_restore(flags);
 861}
 862
 863/* FIXME - should return -1 */
 864int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 865{
 866        struct csrow_info **csrows = mci->csrows;
 867        int row, i, j, n;
 868
 869        edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 870        row = -1;
 871
 872        for (i = 0; i < mci->nr_csrows; i++) {
 873                struct csrow_info *csrow = csrows[i];
 874                n = 0;
 875                for (j = 0; j < csrow->nr_channels; j++) {
 876                        struct dimm_info *dimm = csrow->channels[j]->dimm;
 877                        n += dimm->nr_pages;
 878                }
 879                if (n == 0)
 880                        continue;
 881
 882                edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
 883                         mci->mc_idx,
 884                         csrow->first_page, page, csrow->last_page,
 885                         csrow->page_mask);
 886
 887                if ((page >= csrow->first_page) &&
 888                    (page <= csrow->last_page) &&
 889                    ((page & csrow->page_mask) ==
 890                     (csrow->first_page & csrow->page_mask))) {
 891                        row = i;
 892                        break;
 893                }
 894        }
 895
 896        if (row == -1)
 897                edac_mc_printk(mci, KERN_ERR,
 898                        "could not look up page error address %lx\n",
 899                        (unsigned long)page);
 900
 901        return row;
 902}
 903EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 904
 905const char *edac_layer_name[] = {
 906        [EDAC_MC_LAYER_BRANCH] = "branch",
 907        [EDAC_MC_LAYER_CHANNEL] = "channel",
 908        [EDAC_MC_LAYER_SLOT] = "slot",
 909        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 910};
 911EXPORT_SYMBOL_GPL(edac_layer_name);
 912
 913static void edac_inc_ce_error(struct mem_ctl_info *mci,
 914                              bool enable_per_layer_report,
 915                              const int pos[EDAC_MAX_LAYERS],
 916                              const u16 count)
 917{
 918        int i, index = 0;
 919
 920        mci->ce_mc += count;
 921
 922        if (!enable_per_layer_report) {
 923                mci->ce_noinfo_count += count;
 924                return;
 925        }
 926
 927        for (i = 0; i < mci->n_layers; i++) {
 928                if (pos[i] < 0)
 929                        break;
 930                index += pos[i];
 931                mci->ce_per_layer[i][index] += count;
 932
 933                if (i < mci->n_layers - 1)
 934                        index *= mci->layers[i + 1].size;
 935        }
 936}
 937
 938static void edac_inc_ue_error(struct mem_ctl_info *mci,
 939                                    bool enable_per_layer_report,
 940                                    const int pos[EDAC_MAX_LAYERS],
 941                                    const u16 count)
 942{
 943        int i, index = 0;
 944
 945        mci->ue_mc += count;
 946
 947        if (!enable_per_layer_report) {
 948                mci->ce_noinfo_count += count;
 949                return;
 950        }
 951
 952        for (i = 0; i < mci->n_layers; i++) {
 953                if (pos[i] < 0)
 954                        break;
 955                index += pos[i];
 956                mci->ue_per_layer[i][index] += count;
 957
 958                if (i < mci->n_layers - 1)
 959                        index *= mci->layers[i + 1].size;
 960        }
 961}
 962
 963static void edac_ce_error(struct mem_ctl_info *mci,
 964                          const u16 error_count,
 965                          const int pos[EDAC_MAX_LAYERS],
 966                          const char *msg,
 967                          const char *location,
 968                          const char *label,
 969                          const char *detail,
 970                          const char *other_detail,
 971                          const bool enable_per_layer_report,
 972                          const unsigned long page_frame_number,
 973                          const unsigned long offset_in_page,
 974                          long grain)
 975{
 976        unsigned long remapped_page;
 977        char *msg_aux = "";
 978
 979        if (*msg)
 980                msg_aux = " ";
 981
 982        if (edac_mc_get_log_ce()) {
 983                if (other_detail && *other_detail)
 984                        edac_mc_printk(mci, KERN_WARNING,
 985                                       "%d CE %s%son %s (%s %s - %s)\n",
 986                                       error_count, msg, msg_aux, label,
 987                                       location, detail, other_detail);
 988                else
 989                        edac_mc_printk(mci, KERN_WARNING,
 990                                       "%d CE %s%son %s (%s %s)\n",
 991                                       error_count, msg, msg_aux, label,
 992                                       location, detail);
 993        }
 994        edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
 995
 996        if (mci->scrub_mode & SCRUB_SW_SRC) {
 997                /*
 998                        * Some memory controllers (called MCs below) can remap
 999                        * memory so that it is still available at a different
1000                        * address when PCI devices map into memory.
1001                        * MC's that can't do this, lose the memory where PCI
1002                        * devices are mapped. This mapping is MC-dependent
1003                        * and so we call back into the MC driver for it to
1004                        * map the MC page to a physical (CPU) page which can
1005                        * then be mapped to a virtual page - which can then
1006                        * be scrubbed.
1007                        */
1008                remapped_page = mci->ctl_page_to_phys ?
1009                        mci->ctl_page_to_phys(mci, page_frame_number) :
1010                        page_frame_number;
1011
1012                edac_mc_scrub_block(remapped_page,
1013                                        offset_in_page, grain);
1014        }
1015}
1016
1017static void edac_ue_error(struct mem_ctl_info *mci,
1018                          const u16 error_count,
1019                          const int pos[EDAC_MAX_LAYERS],
1020                          const char *msg,
1021                          const char *location,
1022                          const char *label,
1023                          const char *detail,
1024                          const char *other_detail,
1025                          const bool enable_per_layer_report)
1026{
1027        char *msg_aux = "";
1028
1029        if (*msg)
1030                msg_aux = " ";
1031
1032        if (edac_mc_get_log_ue()) {
1033                if (other_detail && *other_detail)
1034                        edac_mc_printk(mci, KERN_WARNING,
1035                                       "%d UE %s%son %s (%s %s - %s)\n",
1036                                       error_count, msg, msg_aux, label,
1037                                       location, detail, other_detail);
1038                else
1039                        edac_mc_printk(mci, KERN_WARNING,
1040                                       "%d UE %s%son %s (%s %s)\n",
1041                                       error_count, msg, msg_aux, label,
1042                                       location, detail);
1043        }
1044
1045        if (edac_mc_get_panic_on_ue()) {
1046                if (other_detail && *other_detail)
1047                        panic("UE %s%son %s (%s%s - %s)\n",
1048                              msg, msg_aux, label, location, detail, other_detail);
1049                else
1050                        panic("UE %s%son %s (%s%s)\n",
1051                              msg, msg_aux, label, location, detail);
1052        }
1053
1054        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1055}
1056
1057#define OTHER_LABEL " or "
1058
1059/**
1060 * edac_mc_handle_error - reports a memory event to userspace
1061 *
1062 * @type:               severity of the error (CE/UE/Fatal)
1063 * @mci:                a struct mem_ctl_info pointer
1064 * @error_count:        Number of errors of the same type
1065 * @page_frame_number:  mem page where the error occurred
1066 * @offset_in_page:     offset of the error inside the page
1067 * @syndrome:           ECC syndrome
1068 * @top_layer:          Memory layer[0] position
1069 * @mid_layer:          Memory layer[1] position
1070 * @low_layer:          Memory layer[2] position
1071 * @msg:                Message meaningful to the end users that
1072 *                      explains the event
1073 * @other_detail:       Technical details about the event that
1074 *                      may help hardware manufacturers and
1075 *                      EDAC developers to analyse the event
1076 */
1077void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1078                          struct mem_ctl_info *mci,
1079                          const u16 error_count,
1080                          const unsigned long page_frame_number,
1081                          const unsigned long offset_in_page,
1082                          const unsigned long syndrome,
1083                          const int top_layer,
1084                          const int mid_layer,
1085                          const int low_layer,
1086                          const char *msg,
1087                          const char *other_detail)
1088{
1089        /* FIXME: too much for stack: move it to some pre-alocated area */
1090        char detail[80], location[80];
1091        char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1092        char *p;
1093        int row = -1, chan = -1;
1094        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1095        int i;
1096        long grain;
1097        bool enable_per_layer_report = false;
1098        u8 grain_bits;
1099
1100        edac_dbg(3, "MC%d\n", mci->mc_idx);
1101
1102        /*
1103         * Check if the event report is consistent and if the memory
1104         * location is known. If it is known, enable_per_layer_report will be
1105         * true, the DIMM(s) label info will be filled and the per-layer
1106         * error counters will be incremented.
1107         */
1108        for (i = 0; i < mci->n_layers; i++) {
1109                if (pos[i] >= (int)mci->layers[i].size) {
1110
1111                        edac_mc_printk(mci, KERN_ERR,
1112                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1113                                       edac_layer_name[mci->layers[i].type],
1114                                       pos[i], mci->layers[i].size);
1115                        /*
1116                         * Instead of just returning it, let's use what's
1117                         * known about the error. The increment routines and
1118                         * the DIMM filter logic will do the right thing by
1119                         * pointing the likely damaged DIMMs.
1120                         */
1121                        pos[i] = -1;
1122                }
1123                if (pos[i] >= 0)
1124                        enable_per_layer_report = true;
1125        }
1126
1127        /*
1128         * Get the dimm label/grain that applies to the match criteria.
1129         * As the error algorithm may not be able to point to just one memory
1130         * stick, the logic here will get all possible labels that could
1131         * pottentially be affected by the error.
1132         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1133         * to have only the MC channel and the MC dimm (also called "branch")
1134         * but the channel is not known, as the memory is arranged in pairs,
1135         * where each memory belongs to a separate channel within the same
1136         * branch.
1137         */
1138        grain = 0;
1139        p = label;
1140        *p = '\0';
1141
1142        for (i = 0; i < mci->tot_dimms; i++) {
1143                struct dimm_info *dimm = mci->dimms[i];
1144
1145                if (top_layer >= 0 && top_layer != dimm->location[0])
1146                        continue;
1147                if (mid_layer >= 0 && mid_layer != dimm->location[1])
1148                        continue;
1149                if (low_layer >= 0 && low_layer != dimm->location[2])
1150                        continue;
1151
1152                /* get the max grain, over the error match range */
1153                if (dimm->grain > grain)
1154                        grain = dimm->grain;
1155
1156                /*
1157                 * If the error is memory-controller wide, there's no need to
1158                 * seek for the affected DIMMs because the whole
1159                 * channel/memory controller/...  may be affected.
1160                 * Also, don't show errors for empty DIMM slots.
1161                 */
1162                if (enable_per_layer_report && dimm->nr_pages) {
1163                        if (p != label) {
1164                                strcpy(p, OTHER_LABEL);
1165                                p += strlen(OTHER_LABEL);
1166                        }
1167                        strcpy(p, dimm->label);
1168                        p += strlen(p);
1169                        *p = '\0';
1170
1171                        /*
1172                         * get csrow/channel of the DIMM, in order to allow
1173                         * incrementing the compat API counters
1174                         */
1175                        edac_dbg(4, "%s csrows map: (%d,%d)\n",
1176                                 mci->mem_is_per_rank ? "rank" : "dimm",
1177                                 dimm->csrow, dimm->cschannel);
1178                        if (row == -1)
1179                                row = dimm->csrow;
1180                        else if (row >= 0 && row != dimm->csrow)
1181                                row = -2;
1182
1183                        if (chan == -1)
1184                                chan = dimm->cschannel;
1185                        else if (chan >= 0 && chan != dimm->cschannel)
1186                                chan = -2;
1187                }
1188        }
1189
1190        if (!enable_per_layer_report) {
1191                strcpy(label, "any memory");
1192        } else {
1193                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1194                if (p == label)
1195                        strcpy(label, "unknown memory");
1196                if (type == HW_EVENT_ERR_CORRECTED) {
1197                        if (row >= 0) {
1198                                mci->csrows[row]->ce_count += error_count;
1199                                if (chan >= 0)
1200                                        mci->csrows[row]->channels[chan]->ce_count += error_count;
1201                        }
1202                } else
1203                        if (row >= 0)
1204                                mci->csrows[row]->ue_count += error_count;
1205        }
1206
1207        /* Fill the RAM location data */
1208        p = location;
1209
1210        for (i = 0; i < mci->n_layers; i++) {
1211                if (pos[i] < 0)
1212                        continue;
1213
1214                p += sprintf(p, "%s:%d ",
1215                             edac_layer_name[mci->layers[i].type],
1216                             pos[i]);
1217        }
1218        if (p > location)
1219                *(p - 1) = '\0';
1220
1221        /* Report the error via the trace interface */
1222        grain_bits = fls_long(grain) + 1;
1223        trace_mc_event(type, msg, label, error_count,
1224                       mci->mc_idx, top_layer, mid_layer, low_layer,
1225                       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1226                       grain_bits, syndrome, other_detail);
1227
1228        /* Memory type dependent details about the error */
1229        if (type == HW_EVENT_ERR_CORRECTED) {
1230                snprintf(detail, sizeof(detail),
1231                        "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1232                        page_frame_number, offset_in_page,
1233                        grain, syndrome);
1234                edac_ce_error(mci, error_count, pos, msg, location, label,
1235                              detail, other_detail, enable_per_layer_report,
1236                              page_frame_number, offset_in_page, grain);
1237        } else {
1238                snprintf(detail, sizeof(detail),
1239                        "page:0x%lx offset:0x%lx grain:%ld",
1240                        page_frame_number, offset_in_page, grain);
1241
1242                edac_ue_error(mci, error_count, pos, msg, location, label,
1243                              detail, other_detail, enable_per_layer_report);
1244        }
1245}
1246EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1247
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.