linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <linux/bitops.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37#define CREATE_TRACE_POINTS
  38#define TRACE_INCLUDE_PATH ../../include/ras
  39#include <ras/ras_event.h>
  40
  41/* lock to memory controller's control array */
  42static DEFINE_MUTEX(mem_ctls_mutex);
  43static LIST_HEAD(mc_devices);
  44
  45/*
  46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
  47 *      apei/ghes and i7core_edac to be used at the same time.
  48 */
  49static void const *edac_mc_owner;
  50
  51unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
  52                                 unsigned len)
  53{
  54        struct mem_ctl_info *mci = dimm->mci;
  55        int i, n, count = 0;
  56        char *p = buf;
  57
  58        for (i = 0; i < mci->n_layers; i++) {
  59                n = snprintf(p, len, "%s %d ",
  60                              edac_layer_name[mci->layers[i].type],
  61                              dimm->location[i]);
  62                p += n;
  63                len -= n;
  64                count += n;
  65                if (!len)
  66                        break;
  67        }
  68
  69        return count;
  70}
  71
  72#ifdef CONFIG_EDAC_DEBUG
  73
  74static void edac_mc_dump_channel(struct rank_info *chan)
  75{
  76        edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
  77        edac_dbg(4, "    channel = %p\n", chan);
  78        edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
  79        edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
  80}
  81
  82static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
  83{
  84        char location[80];
  85
  86        edac_dimm_info_location(dimm, location, sizeof(location));
  87
  88        edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
  89                 dimm->mci->csbased ? "rank" : "dimm",
  90                 number, location, dimm->csrow, dimm->cschannel);
  91        edac_dbg(4, "  dimm = %p\n", dimm);
  92        edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
  93        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  94        edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
  95        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  96}
  97
  98static void edac_mc_dump_csrow(struct csrow_info *csrow)
  99{
 100        edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
 101        edac_dbg(4, "  csrow = %p\n", csrow);
 102        edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
 103        edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
 104        edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
 105        edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
 106        edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
 107        edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 108}
 109
 110static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 111{
 112        edac_dbg(3, "\tmci = %p\n", mci);
 113        edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
 114        edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
 115        edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
 116        edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
 117        edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
 118                 mci->nr_csrows, mci->csrows);
 119        edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
 120                 mci->tot_dimms, mci->dimms);
 121        edac_dbg(3, "\tdev = %p\n", mci->pdev);
 122        edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
 123                 mci->mod_name, mci->ctl_name);
 124        edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 125}
 126
 127#endif                          /* CONFIG_EDAC_DEBUG */
 128
 129/*
 130 * keep those in sync with the enum mem_type
 131 */
 132const char *edac_mem_types[] = {
 133        "Empty csrow",
 134        "Reserved csrow type",
 135        "Unknown csrow type",
 136        "Fast page mode RAM",
 137        "Extended data out RAM",
 138        "Burst Extended data out RAM",
 139        "Single data rate SDRAM",
 140        "Registered single data rate SDRAM",
 141        "Double data rate SDRAM",
 142        "Registered Double data rate SDRAM",
 143        "Rambus DRAM",
 144        "Unbuffered DDR2 RAM",
 145        "Fully buffered DDR2",
 146        "Registered DDR2 RAM",
 147        "Rambus XDR",
 148        "Unbuffered DDR3 RAM",
 149        "Registered DDR3 RAM",
 150};
 151EXPORT_SYMBOL_GPL(edac_mem_types);
 152
 153/**
 154 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 155 * @p:          pointer to a pointer with the memory offset to be used. At
 156 *              return, this will be incremented to point to the next offset
 157 * @size:       Size of the data structure to be reserved
 158 * @n_elems:    Number of elements that should be reserved
 159 *
 160 * If 'size' is a constant, the compiler will optimize this whole function
 161 * down to either a no-op or the addition of a constant to the value of '*p'.
 162 *
 163 * The 'p' pointer is absolutely needed to keep the proper advancing
 164 * further in memory to the proper offsets when allocating the struct along
 165 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 166 * above, for example.
 167 *
 168 * At return, the pointer 'p' will be incremented to be used on a next call
 169 * to this function.
 170 */
 171void *edac_align_ptr(void **p, unsigned size, int n_elems)
 172{
 173        unsigned align, r;
 174        void *ptr = *p;
 175
 176        *p += size * n_elems;
 177
 178        /*
 179         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 180         * 'size'.  Adjust 'p' so that its alignment is at least as
 181         * stringent as what the compiler would provide for X and return
 182         * the aligned result.
 183         * Here we assume that the alignment of a "long long" is the most
 184         * stringent alignment that the compiler will ever provide by default.
 185         * As far as I know, this is a reasonable assumption.
 186         */
 187        if (size > sizeof(long))
 188                align = sizeof(long long);
 189        else if (size > sizeof(int))
 190                align = sizeof(long);
 191        else if (size > sizeof(short))
 192                align = sizeof(int);
 193        else if (size > sizeof(char))
 194                align = sizeof(short);
 195        else
 196                return (char *)ptr;
 197
 198        r = (unsigned long)p % align;
 199
 200        if (r == 0)
 201                return (char *)ptr;
 202
 203        *p += align - r;
 204
 205        return (void *)(((unsigned long)ptr) + align - r);
 206}
 207
 208static void _edac_mc_free(struct mem_ctl_info *mci)
 209{
 210        int i, chn, row;
 211        struct csrow_info *csr;
 212        const unsigned int tot_dimms = mci->tot_dimms;
 213        const unsigned int tot_channels = mci->num_cschannel;
 214        const unsigned int tot_csrows = mci->nr_csrows;
 215
 216        if (mci->dimms) {
 217                for (i = 0; i < tot_dimms; i++)
 218                        kfree(mci->dimms[i]);
 219                kfree(mci->dimms);
 220        }
 221        if (mci->csrows) {
 222                for (row = 0; row < tot_csrows; row++) {
 223                        csr = mci->csrows[row];
 224                        if (csr) {
 225                                if (csr->channels) {
 226                                        for (chn = 0; chn < tot_channels; chn++)
 227                                                kfree(csr->channels[chn]);
 228                                        kfree(csr->channels);
 229                                }
 230                                kfree(csr);
 231                        }
 232                }
 233                kfree(mci->csrows);
 234        }
 235        kfree(mci);
 236}
 237
 238/**
 239 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 240 * @mc_num:             Memory controller number
 241 * @n_layers:           Number of MC hierarchy layers
 242 * layers:              Describes each layer as seen by the Memory Controller
 243 * @size_pvt:           size of private storage needed
 244 *
 245 *
 246 * Everything is kmalloc'ed as one big chunk - more efficient.
 247 * Only can be used if all structures have the same lifetime - otherwise
 248 * you have to allocate and initialize your own structures.
 249 *
 250 * Use edac_mc_free() to free mc structures allocated by this function.
 251 *
 252 * NOTE: drivers handle multi-rank memories in different ways: in some
 253 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 254 * others, a single multi-rank memory stick would be mapped into several
 255 * entries. Currently, this function will allocate multiple struct dimm_info
 256 * on such scenarios, as grouping the multiple ranks require drivers change.
 257 *
 258 * Returns:
 259 *      On failure: NULL
 260 *      On success: struct mem_ctl_info pointer
 261 */
 262struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 263                                   unsigned n_layers,
 264                                   struct edac_mc_layer *layers,
 265                                   unsigned sz_pvt)
 266{
 267        struct mem_ctl_info *mci;
 268        struct edac_mc_layer *layer;
 269        struct csrow_info *csr;
 270        struct rank_info *chan;
 271        struct dimm_info *dimm;
 272        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 273        unsigned pos[EDAC_MAX_LAYERS];
 274        unsigned size, tot_dimms = 1, count = 1;
 275        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 276        void *pvt, *p, *ptr = NULL;
 277        int i, j, row, chn, n, len, off;
 278        bool per_rank = false;
 279
 280        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 281        /*
 282         * Calculate the total amount of dimms and csrows/cschannels while
 283         * in the old API emulation mode
 284         */
 285        for (i = 0; i < n_layers; i++) {
 286                tot_dimms *= layers[i].size;
 287                if (layers[i].is_virt_csrow)
 288                        tot_csrows *= layers[i].size;
 289                else
 290                        tot_channels *= layers[i].size;
 291
 292                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 293                        per_rank = true;
 294        }
 295
 296        /* Figure out the offsets of the various items from the start of an mc
 297         * structure.  We want the alignment of each item to be at least as
 298         * stringent as what the compiler would provide if we could simply
 299         * hardcode everything into a single struct.
 300         */
 301        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 302        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 303        for (i = 0; i < n_layers; i++) {
 304                count *= layers[i].size;
 305                edac_dbg(4, "errcount layer %d size %d\n", i, count);
 306                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 307                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 308                tot_errcount += 2 * count;
 309        }
 310
 311        edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 312        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 313        size = ((unsigned long)pvt) + sz_pvt;
 314
 315        edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 316                 size,
 317                 tot_dimms,
 318                 per_rank ? "ranks" : "dimms",
 319                 tot_csrows * tot_channels);
 320
 321        mci = kzalloc(size, GFP_KERNEL);
 322        if (mci == NULL)
 323                return NULL;
 324
 325        /* Adjust pointers so they point within the memory we just allocated
 326         * rather than an imaginary chunk of memory located at address 0.
 327         */
 328        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 329        for (i = 0; i < n_layers; i++) {
 330                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 331                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 332        }
 333        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 334
 335        /* setup index and various internal pointers */
 336        mci->mc_idx = mc_num;
 337        mci->tot_dimms = tot_dimms;
 338        mci->pvt_info = pvt;
 339        mci->n_layers = n_layers;
 340        mci->layers = layer;
 341        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 342        mci->nr_csrows = tot_csrows;
 343        mci->num_cschannel = tot_channels;
 344        mci->csbased = per_rank;
 345
 346        /*
 347         * Alocate and fill the csrow/channels structs
 348         */
 349        mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
 350        if (!mci->csrows)
 351                goto error;
 352        for (row = 0; row < tot_csrows; row++) {
 353                csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 354                if (!csr)
 355                        goto error;
 356                mci->csrows[row] = csr;
 357                csr->csrow_idx = row;
 358                csr->mci = mci;
 359                csr->nr_channels = tot_channels;
 360                csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
 361                                        GFP_KERNEL);
 362                if (!csr->channels)
 363                        goto error;
 364
 365                for (chn = 0; chn < tot_channels; chn++) {
 366                        chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 367                        if (!chan)
 368                                goto error;
 369                        csr->channels[chn] = chan;
 370                        chan->chan_idx = chn;
 371                        chan->csrow = csr;
 372                }
 373        }
 374
 375        /*
 376         * Allocate and fill the dimm structs
 377         */
 378        mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
 379        if (!mci->dimms)
 380                goto error;
 381
 382        memset(&pos, 0, sizeof(pos));
 383        row = 0;
 384        chn = 0;
 385        for (i = 0; i < tot_dimms; i++) {
 386                chan = mci->csrows[row]->channels[chn];
 387                off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 388                if (off < 0 || off >= tot_dimms) {
 389                        edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 390                        goto error;
 391                }
 392
 393                dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 394                if (!dimm)
 395                        goto error;
 396                mci->dimms[off] = dimm;
 397                dimm->mci = mci;
 398
 399                /*
 400                 * Copy DIMM location and initialize it.
 401                 */
 402                len = sizeof(dimm->label);
 403                p = dimm->label;
 404                n = snprintf(p, len, "mc#%u", mc_num);
 405                p += n;
 406                len -= n;
 407                for (j = 0; j < n_layers; j++) {
 408                        n = snprintf(p, len, "%s#%u",
 409                                     edac_layer_name[layers[j].type],
 410                                     pos[j]);
 411                        p += n;
 412                        len -= n;
 413                        dimm->location[j] = pos[j];
 414
 415                        if (len <= 0)
 416                                break;
 417                }
 418
 419                /* Link it to the csrows old API data */
 420                chan->dimm = dimm;
 421                dimm->csrow = row;
 422                dimm->cschannel = chn;
 423
 424                /* Increment csrow location */
 425                if (layers[0].is_virt_csrow) {
 426                        chn++;
 427                        if (chn == tot_channels) {
 428                                chn = 0;
 429                                row++;
 430                        }
 431                } else {
 432                        row++;
 433                        if (row == tot_csrows) {
 434                                row = 0;
 435                                chn++;
 436                        }
 437                }
 438
 439                /* Increment dimm location */
 440                for (j = n_layers - 1; j >= 0; j--) {
 441                        pos[j]++;
 442                        if (pos[j] < layers[j].size)
 443                                break;
 444                        pos[j] = 0;
 445                }
 446        }
 447
 448        mci->op_state = OP_ALLOC;
 449
 450        return mci;
 451
 452error:
 453        _edac_mc_free(mci);
 454
 455        return NULL;
 456}
 457EXPORT_SYMBOL_GPL(edac_mc_alloc);
 458
 459/**
 460 * edac_mc_free
 461 *      'Free' a previously allocated 'mci' structure
 462 * @mci: pointer to a struct mem_ctl_info structure
 463 */
 464void edac_mc_free(struct mem_ctl_info *mci)
 465{
 466        edac_dbg(1, "\n");
 467
 468        /* If we're not yet registered with sysfs free only what was allocated
 469         * in edac_mc_alloc().
 470         */
 471        if (!device_is_registered(&mci->dev)) {
 472                _edac_mc_free(mci);
 473                return;
 474        }
 475
 476        /* the mci instance is freed here, when the sysfs object is dropped */
 477        edac_unregister_sysfs(mci);
 478}
 479EXPORT_SYMBOL_GPL(edac_mc_free);
 480
 481
 482/**
 483 * find_mci_by_dev
 484 *
 485 *      scan list of controllers looking for the one that manages
 486 *      the 'dev' device
 487 * @dev: pointer to a struct device related with the MCI
 488 */
 489struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 490{
 491        struct mem_ctl_info *mci;
 492        struct list_head *item;
 493
 494        edac_dbg(3, "\n");
 495
 496        list_for_each(item, &mc_devices) {
 497                mci = list_entry(item, struct mem_ctl_info, link);
 498
 499                if (mci->pdev == dev)
 500                        return mci;
 501        }
 502
 503        return NULL;
 504}
 505EXPORT_SYMBOL_GPL(find_mci_by_dev);
 506
 507/*
 508 * handler for EDAC to check if NMI type handler has asserted interrupt
 509 */
 510static int edac_mc_assert_error_check_and_clear(void)
 511{
 512        int old_state;
 513
 514        if (edac_op_state == EDAC_OPSTATE_POLL)
 515                return 1;
 516
 517        old_state = edac_err_assert;
 518        edac_err_assert = 0;
 519
 520        return old_state;
 521}
 522
 523/*
 524 * edac_mc_workq_function
 525 *      performs the operation scheduled by a workq request
 526 */
 527static void edac_mc_workq_function(struct work_struct *work_req)
 528{
 529        struct delayed_work *d_work = to_delayed_work(work_req);
 530        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 531
 532        mutex_lock(&mem_ctls_mutex);
 533
 534        /* if this control struct has movd to offline state, we are done */
 535        if (mci->op_state == OP_OFFLINE) {
 536                mutex_unlock(&mem_ctls_mutex);
 537                return;
 538        }
 539
 540        /* Only poll controllers that are running polled and have a check */
 541        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 542                mci->edac_check(mci);
 543
 544        mutex_unlock(&mem_ctls_mutex);
 545
 546        /* Reschedule */
 547        queue_delayed_work(edac_workqueue, &mci->work,
 548                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 549}
 550
 551/*
 552 * edac_mc_workq_setup
 553 *      initialize a workq item for this mci
 554 *      passing in the new delay period in msec
 555 *
 556 *      locking model:
 557 *
 558 *              called with the mem_ctls_mutex held
 559 */
 560static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 561{
 562        edac_dbg(0, "\n");
 563
 564        /* if this instance is not in the POLL state, then simply return */
 565        if (mci->op_state != OP_RUNNING_POLL)
 566                return;
 567
 568        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 569        mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 570}
 571
 572/*
 573 * edac_mc_workq_teardown
 574 *      stop the workq processing on this mci
 575 *
 576 *      locking model:
 577 *
 578 *              called WITHOUT lock held
 579 */
 580static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 581{
 582        int status;
 583
 584        if (mci->op_state != OP_RUNNING_POLL)
 585                return;
 586
 587        status = cancel_delayed_work(&mci->work);
 588        if (status == 0) {
 589                edac_dbg(0, "not canceled, flush the queue\n");
 590
 591                /* workq instance might be running, wait for it */
 592                flush_workqueue(edac_workqueue);
 593        }
 594}
 595
 596/*
 597 * edac_mc_reset_delay_period(unsigned long value)
 598 *
 599 *      user space has updated our poll period value, need to
 600 *      reset our workq delays
 601 */
 602void edac_mc_reset_delay_period(int value)
 603{
 604        struct mem_ctl_info *mci;
 605        struct list_head *item;
 606
 607        mutex_lock(&mem_ctls_mutex);
 608
 609        list_for_each(item, &mc_devices) {
 610                mci = list_entry(item, struct mem_ctl_info, link);
 611
 612                edac_mc_workq_setup(mci, (unsigned long) value);
 613        }
 614
 615        mutex_unlock(&mem_ctls_mutex);
 616}
 617
 618
 619
 620/* Return 0 on success, 1 on failure.
 621 * Before calling this function, caller must
 622 * assign a unique value to mci->mc_idx.
 623 *
 624 *      locking model:
 625 *
 626 *              called with the mem_ctls_mutex lock held
 627 */
 628static int add_mc_to_global_list(struct mem_ctl_info *mci)
 629{
 630        struct list_head *item, *insert_before;
 631        struct mem_ctl_info *p;
 632
 633        insert_before = &mc_devices;
 634
 635        p = find_mci_by_dev(mci->pdev);
 636        if (unlikely(p != NULL))
 637                goto fail0;
 638
 639        list_for_each(item, &mc_devices) {
 640                p = list_entry(item, struct mem_ctl_info, link);
 641
 642                if (p->mc_idx >= mci->mc_idx) {
 643                        if (unlikely(p->mc_idx == mci->mc_idx))
 644                                goto fail1;
 645
 646                        insert_before = item;
 647                        break;
 648                }
 649        }
 650
 651        list_add_tail_rcu(&mci->link, insert_before);
 652        atomic_inc(&edac_handlers);
 653        return 0;
 654
 655fail0:
 656        edac_printk(KERN_WARNING, EDAC_MC,
 657                "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 658                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 659        return 1;
 660
 661fail1:
 662        edac_printk(KERN_WARNING, EDAC_MC,
 663                "bug in low-level driver: attempt to assign\n"
 664                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 665        return 1;
 666}
 667
 668static int del_mc_from_global_list(struct mem_ctl_info *mci)
 669{
 670        int handlers = atomic_dec_return(&edac_handlers);
 671        list_del_rcu(&mci->link);
 672
 673        /* these are for safe removal of devices from global list while
 674         * NMI handlers may be traversing list
 675         */
 676        synchronize_rcu();
 677        INIT_LIST_HEAD(&mci->link);
 678
 679        return handlers;
 680}
 681
 682/**
 683 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 684 *
 685 * If found, return a pointer to the structure.
 686 * Else return NULL.
 687 *
 688 * Caller must hold mem_ctls_mutex.
 689 */
 690struct mem_ctl_info *edac_mc_find(int idx)
 691{
 692        struct list_head *item;
 693        struct mem_ctl_info *mci;
 694
 695        list_for_each(item, &mc_devices) {
 696                mci = list_entry(item, struct mem_ctl_info, link);
 697
 698                if (mci->mc_idx >= idx) {
 699                        if (mci->mc_idx == idx)
 700                                return mci;
 701
 702                        break;
 703                }
 704        }
 705
 706        return NULL;
 707}
 708EXPORT_SYMBOL(edac_mc_find);
 709
 710/**
 711 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 712 *                 create sysfs entries associated with mci structure
 713 * @mci: pointer to the mci structure to be added to the list
 714 *
 715 * Return:
 716 *      0       Success
 717 *      !0      Failure
 718 */
 719
 720/* FIXME - should a warning be printed if no error detection? correction? */
 721int edac_mc_add_mc(struct mem_ctl_info *mci)
 722{
 723        int ret = -EINVAL;
 724        edac_dbg(0, "\n");
 725
 726#ifdef CONFIG_EDAC_DEBUG
 727        if (edac_debug_level >= 3)
 728                edac_mc_dump_mci(mci);
 729
 730        if (edac_debug_level >= 4) {
 731                int i;
 732
 733                for (i = 0; i < mci->nr_csrows; i++) {
 734                        struct csrow_info *csrow = mci->csrows[i];
 735                        u32 nr_pages = 0;
 736                        int j;
 737
 738                        for (j = 0; j < csrow->nr_channels; j++)
 739                                nr_pages += csrow->channels[j]->dimm->nr_pages;
 740                        if (!nr_pages)
 741                                continue;
 742                        edac_mc_dump_csrow(csrow);
 743                        for (j = 0; j < csrow->nr_channels; j++)
 744                                if (csrow->channels[j]->dimm->nr_pages)
 745                                        edac_mc_dump_channel(csrow->channels[j]);
 746                }
 747                for (i = 0; i < mci->tot_dimms; i++)
 748                        if (mci->dimms[i]->nr_pages)
 749                                edac_mc_dump_dimm(mci->dimms[i], i);
 750        }
 751#endif
 752        mutex_lock(&mem_ctls_mutex);
 753
 754        if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
 755                ret = -EPERM;
 756                goto fail0;
 757        }
 758
 759        if (add_mc_to_global_list(mci))
 760                goto fail0;
 761
 762        /* set load time so that error rate can be tracked */
 763        mci->start_time = jiffies;
 764
 765        if (edac_create_sysfs_mci_device(mci)) {
 766                edac_mc_printk(mci, KERN_WARNING,
 767                        "failed to create sysfs device\n");
 768                goto fail1;
 769        }
 770
 771        /* If there IS a check routine, then we are running POLLED */
 772        if (mci->edac_check != NULL) {
 773                /* This instance is NOW RUNNING */
 774                mci->op_state = OP_RUNNING_POLL;
 775
 776                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 777        } else {
 778                mci->op_state = OP_RUNNING_INTERRUPT;
 779        }
 780
 781        /* Report action taken */
 782        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 783                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 784
 785        edac_mc_owner = mci->mod_name;
 786
 787        mutex_unlock(&mem_ctls_mutex);
 788        return 0;
 789
 790fail1:
 791        del_mc_from_global_list(mci);
 792
 793fail0:
 794        mutex_unlock(&mem_ctls_mutex);
 795        return ret;
 796}
 797EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 798
 799/**
 800 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 801 *                 remove mci structure from global list
 802 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 803 *
 804 * Return pointer to removed mci structure, or NULL if device not found.
 805 */
 806struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 807{
 808        struct mem_ctl_info *mci;
 809
 810        edac_dbg(0, "\n");
 811
 812        mutex_lock(&mem_ctls_mutex);
 813
 814        /* find the requested mci struct in the global list */
 815        mci = find_mci_by_dev(dev);
 816        if (mci == NULL) {
 817                mutex_unlock(&mem_ctls_mutex);
 818                return NULL;
 819        }
 820
 821        if (!del_mc_from_global_list(mci))
 822                edac_mc_owner = NULL;
 823        mutex_unlock(&mem_ctls_mutex);
 824
 825        /* flush workq processes */
 826        edac_mc_workq_teardown(mci);
 827
 828        /* marking MCI offline */
 829        mci->op_state = OP_OFFLINE;
 830
 831        /* remove from sysfs */
 832        edac_remove_sysfs_mci_device(mci);
 833
 834        edac_printk(KERN_INFO, EDAC_MC,
 835                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 836                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 837
 838        return mci;
 839}
 840EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 841
 842static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 843                                u32 size)
 844{
 845        struct page *pg;
 846        void *virt_addr;
 847        unsigned long flags = 0;
 848
 849        edac_dbg(3, "\n");
 850
 851        /* ECC error page was not in our memory. Ignore it. */
 852        if (!pfn_valid(page))
 853                return;
 854
 855        /* Find the actual page structure then map it and fix */
 856        pg = pfn_to_page(page);
 857
 858        if (PageHighMem(pg))
 859                local_irq_save(flags);
 860
 861        virt_addr = kmap_atomic(pg);
 862
 863        /* Perform architecture specific atomic scrub operation */
 864        atomic_scrub(virt_addr + offset, size);
 865
 866        /* Unmap and complete */
 867        kunmap_atomic(virt_addr);
 868
 869        if (PageHighMem(pg))
 870                local_irq_restore(flags);
 871}
 872
 873/* FIXME - should return -1 */
 874int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 875{
 876        struct csrow_info **csrows = mci->csrows;
 877        int row, i, j, n;
 878
 879        edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 880        row = -1;
 881
 882        for (i = 0; i < mci->nr_csrows; i++) {
 883                struct csrow_info *csrow = csrows[i];
 884                n = 0;
 885                for (j = 0; j < csrow->nr_channels; j++) {
 886                        struct dimm_info *dimm = csrow->channels[j]->dimm;
 887                        n += dimm->nr_pages;
 888                }
 889                if (n == 0)
 890                        continue;
 891
 892                edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
 893                         mci->mc_idx,
 894                         csrow->first_page, page, csrow->last_page,
 895                         csrow->page_mask);
 896
 897                if ((page >= csrow->first_page) &&
 898                    (page <= csrow->last_page) &&
 899                    ((page & csrow->page_mask) ==
 900                     (csrow->first_page & csrow->page_mask))) {
 901                        row = i;
 902                        break;
 903                }
 904        }
 905
 906        if (row == -1)
 907                edac_mc_printk(mci, KERN_ERR,
 908                        "could not look up page error address %lx\n",
 909                        (unsigned long)page);
 910
 911        return row;
 912}
 913EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 914
 915const char *edac_layer_name[] = {
 916        [EDAC_MC_LAYER_BRANCH] = "branch",
 917        [EDAC_MC_LAYER_CHANNEL] = "channel",
 918        [EDAC_MC_LAYER_SLOT] = "slot",
 919        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 920        [EDAC_MC_LAYER_ALL_MEM] = "memory",
 921};
 922EXPORT_SYMBOL_GPL(edac_layer_name);
 923
 924static void edac_inc_ce_error(struct mem_ctl_info *mci,
 925                              bool enable_per_layer_report,
 926                              const int pos[EDAC_MAX_LAYERS],
 927                              const u16 count)
 928{
 929        int i, index = 0;
 930
 931        mci->ce_mc += count;
 932
 933        if (!enable_per_layer_report) {
 934                mci->ce_noinfo_count += count;
 935                return;
 936        }
 937
 938        for (i = 0; i < mci->n_layers; i++) {
 939                if (pos[i] < 0)
 940                        break;
 941                index += pos[i];
 942                mci->ce_per_layer[i][index] += count;
 943
 944                if (i < mci->n_layers - 1)
 945                        index *= mci->layers[i + 1].size;
 946        }
 947}
 948
 949static void edac_inc_ue_error(struct mem_ctl_info *mci,
 950                                    bool enable_per_layer_report,
 951                                    const int pos[EDAC_MAX_LAYERS],
 952                                    const u16 count)
 953{
 954        int i, index = 0;
 955
 956        mci->ue_mc += count;
 957
 958        if (!enable_per_layer_report) {
 959                mci->ce_noinfo_count += count;
 960                return;
 961        }
 962
 963        for (i = 0; i < mci->n_layers; i++) {
 964                if (pos[i] < 0)
 965                        break;
 966                index += pos[i];
 967                mci->ue_per_layer[i][index] += count;
 968
 969                if (i < mci->n_layers - 1)
 970                        index *= mci->layers[i + 1].size;
 971        }
 972}
 973
 974static void edac_ce_error(struct mem_ctl_info *mci,
 975                          const u16 error_count,
 976                          const int pos[EDAC_MAX_LAYERS],
 977                          const char *msg,
 978                          const char *location,
 979                          const char *label,
 980                          const char *detail,
 981                          const char *other_detail,
 982                          const bool enable_per_layer_report,
 983                          const unsigned long page_frame_number,
 984                          const unsigned long offset_in_page,
 985                          long grain)
 986{
 987        unsigned long remapped_page;
 988        char *msg_aux = "";
 989
 990        if (*msg)
 991                msg_aux = " ";
 992
 993        if (edac_mc_get_log_ce()) {
 994                if (other_detail && *other_detail)
 995                        edac_mc_printk(mci, KERN_WARNING,
 996                                       "%d CE %s%son %s (%s %s - %s)\n",
 997                                       error_count, msg, msg_aux, label,
 998                                       location, detail, other_detail);
 999                else
1000                        edac_mc_printk(mci, KERN_WARNING,
1001                                       "%d CE %s%son %s (%s %s)\n",
1002                                       error_count, msg, msg_aux, label,
1003                                       location, detail);
1004        }
1005        edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1006
1007        if (mci->scrub_mode & SCRUB_SW_SRC) {
1008                /*
1009                        * Some memory controllers (called MCs below) can remap
1010                        * memory so that it is still available at a different
1011                        * address when PCI devices map into memory.
1012                        * MC's that can't do this, lose the memory where PCI
1013                        * devices are mapped. This mapping is MC-dependent
1014                        * and so we call back into the MC driver for it to
1015                        * map the MC page to a physical (CPU) page which can
1016                        * then be mapped to a virtual page - which can then
1017                        * be scrubbed.
1018                        */
1019                remapped_page = mci->ctl_page_to_phys ?
1020                        mci->ctl_page_to_phys(mci, page_frame_number) :
1021                        page_frame_number;
1022
1023                edac_mc_scrub_block(remapped_page,
1024                                        offset_in_page, grain);
1025        }
1026}
1027
1028static void edac_ue_error(struct mem_ctl_info *mci,
1029                          const u16 error_count,
1030                          const int pos[EDAC_MAX_LAYERS],
1031                          const char *msg,
1032                          const char *location,
1033                          const char *label,
1034                          const char *detail,
1035                          const char *other_detail,
1036                          const bool enable_per_layer_report)
1037{
1038        char *msg_aux = "";
1039
1040        if (*msg)
1041                msg_aux = " ";
1042
1043        if (edac_mc_get_log_ue()) {
1044                if (other_detail && *other_detail)
1045                        edac_mc_printk(mci, KERN_WARNING,
1046                                       "%d UE %s%son %s (%s %s - %s)\n",
1047                                       error_count, msg, msg_aux, label,
1048                                       location, detail, other_detail);
1049                else
1050                        edac_mc_printk(mci, KERN_WARNING,
1051                                       "%d UE %s%son %s (%s %s)\n",
1052                                       error_count, msg, msg_aux, label,
1053                                       location, detail);
1054        }
1055
1056        if (edac_mc_get_panic_on_ue()) {
1057                if (other_detail && *other_detail)
1058                        panic("UE %s%son %s (%s%s - %s)\n",
1059                              msg, msg_aux, label, location, detail, other_detail);
1060                else
1061                        panic("UE %s%son %s (%s%s)\n",
1062                              msg, msg_aux, label, location, detail);
1063        }
1064
1065        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1066}
1067
1068/**
1069 * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1070 *                            anything to discover the error location
1071 *
1072 * @type:               severity of the error (CE/UE/Fatal)
1073 * @mci:                a struct mem_ctl_info pointer
1074 * @e:                  error description
1075 *
1076 * This raw function is used internally by edac_mc_handle_error(). It should
1077 * only be called directly when the hardware error come directly from BIOS,
1078 * like in the case of APEI GHES driver.
1079 */
1080void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1081                              struct mem_ctl_info *mci,
1082                              struct edac_raw_error_desc *e)
1083{
1084        char detail[80];
1085        int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1086
1087        /* Memory type dependent details about the error */
1088        if (type == HW_EVENT_ERR_CORRECTED) {
1089                snprintf(detail, sizeof(detail),
1090                        "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1091                        e->page_frame_number, e->offset_in_page,
1092                        e->grain, e->syndrome);
1093                edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1094                              detail, e->other_detail, e->enable_per_layer_report,
1095                              e->page_frame_number, e->offset_in_page, e->grain);
1096        } else {
1097                snprintf(detail, sizeof(detail),
1098                        "page:0x%lx offset:0x%lx grain:%ld",
1099                        e->page_frame_number, e->offset_in_page, e->grain);
1100
1101                edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1102                              detail, e->other_detail, e->enable_per_layer_report);
1103        }
1104
1105
1106}
1107EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1108
1109/**
1110 * edac_mc_handle_error - reports a memory event to userspace
1111 *
1112 * @type:               severity of the error (CE/UE/Fatal)
1113 * @mci:                a struct mem_ctl_info pointer
1114 * @error_count:        Number of errors of the same type
1115 * @page_frame_number:  mem page where the error occurred
1116 * @offset_in_page:     offset of the error inside the page
1117 * @syndrome:           ECC syndrome
1118 * @top_layer:          Memory layer[0] position
1119 * @mid_layer:          Memory layer[1] position
1120 * @low_layer:          Memory layer[2] position
1121 * @msg:                Message meaningful to the end users that
1122 *                      explains the event
1123 * @other_detail:       Technical details about the event that
1124 *                      may help hardware manufacturers and
1125 *                      EDAC developers to analyse the event
1126 */
1127void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1128                          struct mem_ctl_info *mci,
1129                          const u16 error_count,
1130                          const unsigned long page_frame_number,
1131                          const unsigned long offset_in_page,
1132                          const unsigned long syndrome,
1133                          const int top_layer,
1134                          const int mid_layer,
1135                          const int low_layer,
1136                          const char *msg,
1137                          const char *other_detail)
1138{
1139        char *p;
1140        int row = -1, chan = -1;
1141        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1142        int i, n_labels = 0;
1143        u8 grain_bits;
1144        struct edac_raw_error_desc *e = &mci->error_desc;
1145
1146        edac_dbg(3, "MC%d\n", mci->mc_idx);
1147
1148        /* Fills the error report buffer */
1149        memset(e, 0, sizeof (*e));
1150        e->error_count = error_count;
1151        e->top_layer = top_layer;
1152        e->mid_layer = mid_layer;
1153        e->low_layer = low_layer;
1154        e->page_frame_number = page_frame_number;
1155        e->offset_in_page = offset_in_page;
1156        e->syndrome = syndrome;
1157        e->msg = msg;
1158        e->other_detail = other_detail;
1159
1160        /*
1161         * Check if the event report is consistent and if the memory
1162         * location is known. If it is known, enable_per_layer_report will be
1163         * true, the DIMM(s) label info will be filled and the per-layer
1164         * error counters will be incremented.
1165         */
1166        for (i = 0; i < mci->n_layers; i++) {
1167                if (pos[i] >= (int)mci->layers[i].size) {
1168
1169                        edac_mc_printk(mci, KERN_ERR,
1170                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1171                                       edac_layer_name[mci->layers[i].type],
1172                                       pos[i], mci->layers[i].size);
1173                        /*
1174                         * Instead of just returning it, let's use what's
1175                         * known about the error. The increment routines and
1176                         * the DIMM filter logic will do the right thing by
1177                         * pointing the likely damaged DIMMs.
1178                         */
1179                        pos[i] = -1;
1180                }
1181                if (pos[i] >= 0)
1182                        e->enable_per_layer_report = true;
1183        }
1184
1185        /*
1186         * Get the dimm label/grain that applies to the match criteria.
1187         * As the error algorithm may not be able to point to just one memory
1188         * stick, the logic here will get all possible labels that could
1189         * pottentially be affected by the error.
1190         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1191         * to have only the MC channel and the MC dimm (also called "branch")
1192         * but the channel is not known, as the memory is arranged in pairs,
1193         * where each memory belongs to a separate channel within the same
1194         * branch.
1195         */
1196        p = e->label;
1197        *p = '\0';
1198
1199        for (i = 0; i < mci->tot_dimms; i++) {
1200                struct dimm_info *dimm = mci->dimms[i];
1201
1202                if (top_layer >= 0 && top_layer != dimm->location[0])
1203                        continue;
1204                if (mid_layer >= 0 && mid_layer != dimm->location[1])
1205                        continue;
1206                if (low_layer >= 0 && low_layer != dimm->location[2])
1207                        continue;
1208
1209                /* get the max grain, over the error match range */
1210                if (dimm->grain > e->grain)
1211                        e->grain = dimm->grain;
1212
1213                /*
1214                 * If the error is memory-controller wide, there's no need to
1215                 * seek for the affected DIMMs because the whole
1216                 * channel/memory controller/...  may be affected.
1217                 * Also, don't show errors for empty DIMM slots.
1218                 */
1219                if (e->enable_per_layer_report && dimm->nr_pages) {
1220                        if (n_labels >= EDAC_MAX_LABELS) {
1221                                e->enable_per_layer_report = false;
1222                                break;
1223                        }
1224                        n_labels++;
1225                        if (p != e->label) {
1226                                strcpy(p, OTHER_LABEL);
1227                                p += strlen(OTHER_LABEL);
1228                        }
1229                        strcpy(p, dimm->label);
1230                        p += strlen(p);
1231                        *p = '\0';
1232
1233                        /*
1234                         * get csrow/channel of the DIMM, in order to allow
1235                         * incrementing the compat API counters
1236                         */
1237                        edac_dbg(4, "%s csrows map: (%d,%d)\n",
1238                                 mci->csbased ? "rank" : "dimm",
1239                                 dimm->csrow, dimm->cschannel);
1240                        if (row == -1)
1241                                row = dimm->csrow;
1242                        else if (row >= 0 && row != dimm->csrow)
1243                                row = -2;
1244
1245                        if (chan == -1)
1246                                chan = dimm->cschannel;
1247                        else if (chan >= 0 && chan != dimm->cschannel)
1248                                chan = -2;
1249                }
1250        }
1251
1252        if (!e->enable_per_layer_report) {
1253                strcpy(e->label, "any memory");
1254        } else {
1255                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1256                if (p == e->label)
1257                        strcpy(e->label, "unknown memory");
1258                if (type == HW_EVENT_ERR_CORRECTED) {
1259                        if (row >= 0) {
1260                                mci->csrows[row]->ce_count += error_count;
1261                                if (chan >= 0)
1262                                        mci->csrows[row]->channels[chan]->ce_count += error_count;
1263                        }
1264                } else
1265                        if (row >= 0)
1266                                mci->csrows[row]->ue_count += error_count;
1267        }
1268
1269        /* Fill the RAM location data */
1270        p = e->location;
1271
1272        for (i = 0; i < mci->n_layers; i++) {
1273                if (pos[i] < 0)
1274                        continue;
1275
1276                p += sprintf(p, "%s:%d ",
1277                             edac_layer_name[mci->layers[i].type],
1278                             pos[i]);
1279        }
1280        if (p > e->location)
1281                *(p - 1) = '\0';
1282
1283        /* Report the error via the trace interface */
1284        grain_bits = fls_long(e->grain) + 1;
1285        trace_mc_event(type, e->msg, e->label, e->error_count,
1286                       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1287                       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1288                       grain_bits, e->syndrome, e->other_detail);
1289
1290        edac_raw_mc_handle_error(type, mci, e);
1291}
1292EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1293
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.