linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <linux/bitops.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37#define CREATE_TRACE_POINTS
  38#define TRACE_INCLUDE_PATH ../../include/ras
  39#include <ras/ras_event.h>
  40
  41/* lock to memory controller's control array */
  42static DEFINE_MUTEX(mem_ctls_mutex);
  43static LIST_HEAD(mc_devices);
  44
  45/*
  46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
  47 *      apei/ghes and i7core_edac to be used at the same time.
  48 */
  49static void const *edac_mc_owner;
  50
  51static struct bus_type mc_bus[EDAC_MAX_MCS];
  52
  53unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
  54                                 unsigned len)
  55{
  56        struct mem_ctl_info *mci = dimm->mci;
  57        int i, n, count = 0;
  58        char *p = buf;
  59
  60        for (i = 0; i < mci->n_layers; i++) {
  61                n = snprintf(p, len, "%s %d ",
  62                              edac_layer_name[mci->layers[i].type],
  63                              dimm->location[i]);
  64                p += n;
  65                len -= n;
  66                count += n;
  67                if (!len)
  68                        break;
  69        }
  70
  71        return count;
  72}
  73
  74#ifdef CONFIG_EDAC_DEBUG
  75
  76static void edac_mc_dump_channel(struct rank_info *chan)
  77{
  78        edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
  79        edac_dbg(4, "    channel = %p\n", chan);
  80        edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
  81        edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
  82}
  83
  84static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
  85{
  86        char location[80];
  87
  88        edac_dimm_info_location(dimm, location, sizeof(location));
  89
  90        edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
  91                 dimm->mci->csbased ? "rank" : "dimm",
  92                 number, location, dimm->csrow, dimm->cschannel);
  93        edac_dbg(4, "  dimm = %p\n", dimm);
  94        edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
  95        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  96        edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
  97        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  98}
  99
 100static void edac_mc_dump_csrow(struct csrow_info *csrow)
 101{
 102        edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
 103        edac_dbg(4, "  csrow = %p\n", csrow);
 104        edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
 105        edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
 106        edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
 107        edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
 108        edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
 109        edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 110}
 111
 112static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 113{
 114        edac_dbg(3, "\tmci = %p\n", mci);
 115        edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
 116        edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
 117        edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
 118        edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
 119        edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
 120                 mci->nr_csrows, mci->csrows);
 121        edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
 122                 mci->tot_dimms, mci->dimms);
 123        edac_dbg(3, "\tdev = %p\n", mci->pdev);
 124        edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
 125                 mci->mod_name, mci->ctl_name);
 126        edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 127}
 128
 129#endif                          /* CONFIG_EDAC_DEBUG */
 130
 131/*
 132 * keep those in sync with the enum mem_type
 133 */
 134const char *edac_mem_types[] = {
 135        "Empty csrow",
 136        "Reserved csrow type",
 137        "Unknown csrow type",
 138        "Fast page mode RAM",
 139        "Extended data out RAM",
 140        "Burst Extended data out RAM",
 141        "Single data rate SDRAM",
 142        "Registered single data rate SDRAM",
 143        "Double data rate SDRAM",
 144        "Registered Double data rate SDRAM",
 145        "Rambus DRAM",
 146        "Unbuffered DDR2 RAM",
 147        "Fully buffered DDR2",
 148        "Registered DDR2 RAM",
 149        "Rambus XDR",
 150        "Unbuffered DDR3 RAM",
 151        "Registered DDR3 RAM",
 152};
 153EXPORT_SYMBOL_GPL(edac_mem_types);
 154
 155/**
 156 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 157 * @p:          pointer to a pointer with the memory offset to be used. At
 158 *              return, this will be incremented to point to the next offset
 159 * @size:       Size of the data structure to be reserved
 160 * @n_elems:    Number of elements that should be reserved
 161 *
 162 * If 'size' is a constant, the compiler will optimize this whole function
 163 * down to either a no-op or the addition of a constant to the value of '*p'.
 164 *
 165 * The 'p' pointer is absolutely needed to keep the proper advancing
 166 * further in memory to the proper offsets when allocating the struct along
 167 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 168 * above, for example.
 169 *
 170 * At return, the pointer 'p' will be incremented to be used on a next call
 171 * to this function.
 172 */
 173void *edac_align_ptr(void **p, unsigned size, int n_elems)
 174{
 175        unsigned align, r;
 176        void *ptr = *p;
 177
 178        *p += size * n_elems;
 179
 180        /*
 181         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 182         * 'size'.  Adjust 'p' so that its alignment is at least as
 183         * stringent as what the compiler would provide for X and return
 184         * the aligned result.
 185         * Here we assume that the alignment of a "long long" is the most
 186         * stringent alignment that the compiler will ever provide by default.
 187         * As far as I know, this is a reasonable assumption.
 188         */
 189        if (size > sizeof(long))
 190                align = sizeof(long long);
 191        else if (size > sizeof(int))
 192                align = sizeof(long);
 193        else if (size > sizeof(short))
 194                align = sizeof(int);
 195        else if (size > sizeof(char))
 196                align = sizeof(short);
 197        else
 198                return (char *)ptr;
 199
 200        r = (unsigned long)p % align;
 201
 202        if (r == 0)
 203                return (char *)ptr;
 204
 205        *p += align - r;
 206
 207        return (void *)(((unsigned long)ptr) + align - r);
 208}
 209
 210static void _edac_mc_free(struct mem_ctl_info *mci)
 211{
 212        int i, chn, row;
 213        struct csrow_info *csr;
 214        const unsigned int tot_dimms = mci->tot_dimms;
 215        const unsigned int tot_channels = mci->num_cschannel;
 216        const unsigned int tot_csrows = mci->nr_csrows;
 217
 218        if (mci->dimms) {
 219                for (i = 0; i < tot_dimms; i++)
 220                        kfree(mci->dimms[i]);
 221                kfree(mci->dimms);
 222        }
 223        if (mci->csrows) {
 224                for (row = 0; row < tot_csrows; row++) {
 225                        csr = mci->csrows[row];
 226                        if (csr) {
 227                                if (csr->channels) {
 228                                        for (chn = 0; chn < tot_channels; chn++)
 229                                                kfree(csr->channels[chn]);
 230                                        kfree(csr->channels);
 231                                }
 232                                kfree(csr);
 233                        }
 234                }
 235                kfree(mci->csrows);
 236        }
 237        kfree(mci);
 238}
 239
 240/**
 241 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 242 * @mc_num:             Memory controller number
 243 * @n_layers:           Number of MC hierarchy layers
 244 * layers:              Describes each layer as seen by the Memory Controller
 245 * @size_pvt:           size of private storage needed
 246 *
 247 *
 248 * Everything is kmalloc'ed as one big chunk - more efficient.
 249 * Only can be used if all structures have the same lifetime - otherwise
 250 * you have to allocate and initialize your own structures.
 251 *
 252 * Use edac_mc_free() to free mc structures allocated by this function.
 253 *
 254 * NOTE: drivers handle multi-rank memories in different ways: in some
 255 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 256 * others, a single multi-rank memory stick would be mapped into several
 257 * entries. Currently, this function will allocate multiple struct dimm_info
 258 * on such scenarios, as grouping the multiple ranks require drivers change.
 259 *
 260 * Returns:
 261 *      On failure: NULL
 262 *      On success: struct mem_ctl_info pointer
 263 */
 264struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 265                                   unsigned n_layers,
 266                                   struct edac_mc_layer *layers,
 267                                   unsigned sz_pvt)
 268{
 269        struct mem_ctl_info *mci;
 270        struct edac_mc_layer *layer;
 271        struct csrow_info *csr;
 272        struct rank_info *chan;
 273        struct dimm_info *dimm;
 274        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 275        unsigned pos[EDAC_MAX_LAYERS];
 276        unsigned size, tot_dimms = 1, count = 1;
 277        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 278        void *pvt, *p, *ptr = NULL;
 279        int i, j, row, chn, n, len, off;
 280        bool per_rank = false;
 281
 282        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 283        /*
 284         * Calculate the total amount of dimms and csrows/cschannels while
 285         * in the old API emulation mode
 286         */
 287        for (i = 0; i < n_layers; i++) {
 288                tot_dimms *= layers[i].size;
 289                if (layers[i].is_virt_csrow)
 290                        tot_csrows *= layers[i].size;
 291                else
 292                        tot_channels *= layers[i].size;
 293
 294                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 295                        per_rank = true;
 296        }
 297
 298        /* Figure out the offsets of the various items from the start of an mc
 299         * structure.  We want the alignment of each item to be at least as
 300         * stringent as what the compiler would provide if we could simply
 301         * hardcode everything into a single struct.
 302         */
 303        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 304        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 305        for (i = 0; i < n_layers; i++) {
 306                count *= layers[i].size;
 307                edac_dbg(4, "errcount layer %d size %d\n", i, count);
 308                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 309                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 310                tot_errcount += 2 * count;
 311        }
 312
 313        edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 314        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 315        size = ((unsigned long)pvt) + sz_pvt;
 316
 317        edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 318                 size,
 319                 tot_dimms,
 320                 per_rank ? "ranks" : "dimms",
 321                 tot_csrows * tot_channels);
 322
 323        mci = kzalloc(size, GFP_KERNEL);
 324        if (mci == NULL)
 325                return NULL;
 326
 327        /* Adjust pointers so they point within the memory we just allocated
 328         * rather than an imaginary chunk of memory located at address 0.
 329         */
 330        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 331        for (i = 0; i < n_layers; i++) {
 332                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 333                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 334        }
 335        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 336
 337        /* setup index and various internal pointers */
 338        mci->mc_idx = mc_num;
 339        mci->tot_dimms = tot_dimms;
 340        mci->pvt_info = pvt;
 341        mci->n_layers = n_layers;
 342        mci->layers = layer;
 343        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 344        mci->nr_csrows = tot_csrows;
 345        mci->num_cschannel = tot_channels;
 346        mci->csbased = per_rank;
 347
 348        /*
 349         * Alocate and fill the csrow/channels structs
 350         */
 351        mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
 352        if (!mci->csrows)
 353                goto error;
 354        for (row = 0; row < tot_csrows; row++) {
 355                csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 356                if (!csr)
 357                        goto error;
 358                mci->csrows[row] = csr;
 359                csr->csrow_idx = row;
 360                csr->mci = mci;
 361                csr->nr_channels = tot_channels;
 362                csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
 363                                        GFP_KERNEL);
 364                if (!csr->channels)
 365                        goto error;
 366
 367                for (chn = 0; chn < tot_channels; chn++) {
 368                        chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 369                        if (!chan)
 370                                goto error;
 371                        csr->channels[chn] = chan;
 372                        chan->chan_idx = chn;
 373                        chan->csrow = csr;
 374                }
 375        }
 376
 377        /*
 378         * Allocate and fill the dimm structs
 379         */
 380        mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
 381        if (!mci->dimms)
 382                goto error;
 383
 384        memset(&pos, 0, sizeof(pos));
 385        row = 0;
 386        chn = 0;
 387        for (i = 0; i < tot_dimms; i++) {
 388                chan = mci->csrows[row]->channels[chn];
 389                off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 390                if (off < 0 || off >= tot_dimms) {
 391                        edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 392                        goto error;
 393                }
 394
 395                dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 396                if (!dimm)
 397                        goto error;
 398                mci->dimms[off] = dimm;
 399                dimm->mci = mci;
 400
 401                /*
 402                 * Copy DIMM location and initialize it.
 403                 */
 404                len = sizeof(dimm->label);
 405                p = dimm->label;
 406                n = snprintf(p, len, "mc#%u", mc_num);
 407                p += n;
 408                len -= n;
 409                for (j = 0; j < n_layers; j++) {
 410                        n = snprintf(p, len, "%s#%u",
 411                                     edac_layer_name[layers[j].type],
 412                                     pos[j]);
 413                        p += n;
 414                        len -= n;
 415                        dimm->location[j] = pos[j];
 416
 417                        if (len <= 0)
 418                                break;
 419                }
 420
 421                /* Link it to the csrows old API data */
 422                chan->dimm = dimm;
 423                dimm->csrow = row;
 424                dimm->cschannel = chn;
 425
 426                /* Increment csrow location */
 427                if (layers[0].is_virt_csrow) {
 428                        chn++;
 429                        if (chn == tot_channels) {
 430                                chn = 0;
 431                                row++;
 432                        }
 433                } else {
 434                        row++;
 435                        if (row == tot_csrows) {
 436                                row = 0;
 437                                chn++;
 438                        }
 439                }
 440
 441                /* Increment dimm location */
 442                for (j = n_layers - 1; j >= 0; j--) {
 443                        pos[j]++;
 444                        if (pos[j] < layers[j].size)
 445                                break;
 446                        pos[j] = 0;
 447                }
 448        }
 449
 450        mci->op_state = OP_ALLOC;
 451
 452        return mci;
 453
 454error:
 455        _edac_mc_free(mci);
 456
 457        return NULL;
 458}
 459EXPORT_SYMBOL_GPL(edac_mc_alloc);
 460
 461/**
 462 * edac_mc_free
 463 *      'Free' a previously allocated 'mci' structure
 464 * @mci: pointer to a struct mem_ctl_info structure
 465 */
 466void edac_mc_free(struct mem_ctl_info *mci)
 467{
 468        edac_dbg(1, "\n");
 469
 470        /* If we're not yet registered with sysfs free only what was allocated
 471         * in edac_mc_alloc().
 472         */
 473        if (!device_is_registered(&mci->dev)) {
 474                _edac_mc_free(mci);
 475                return;
 476        }
 477
 478        /* the mci instance is freed here, when the sysfs object is dropped */
 479        edac_unregister_sysfs(mci);
 480}
 481EXPORT_SYMBOL_GPL(edac_mc_free);
 482
 483
 484/**
 485 * find_mci_by_dev
 486 *
 487 *      scan list of controllers looking for the one that manages
 488 *      the 'dev' device
 489 * @dev: pointer to a struct device related with the MCI
 490 */
 491struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 492{
 493        struct mem_ctl_info *mci;
 494        struct list_head *item;
 495
 496        edac_dbg(3, "\n");
 497
 498        list_for_each(item, &mc_devices) {
 499                mci = list_entry(item, struct mem_ctl_info, link);
 500
 501                if (mci->pdev == dev)
 502                        return mci;
 503        }
 504
 505        return NULL;
 506}
 507EXPORT_SYMBOL_GPL(find_mci_by_dev);
 508
 509/*
 510 * handler for EDAC to check if NMI type handler has asserted interrupt
 511 */
 512static int edac_mc_assert_error_check_and_clear(void)
 513{
 514        int old_state;
 515
 516        if (edac_op_state == EDAC_OPSTATE_POLL)
 517                return 1;
 518
 519        old_state = edac_err_assert;
 520        edac_err_assert = 0;
 521
 522        return old_state;
 523}
 524
 525/*
 526 * edac_mc_workq_function
 527 *      performs the operation scheduled by a workq request
 528 */
 529static void edac_mc_workq_function(struct work_struct *work_req)
 530{
 531        struct delayed_work *d_work = to_delayed_work(work_req);
 532        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 533
 534        mutex_lock(&mem_ctls_mutex);
 535
 536        /* if this control struct has movd to offline state, we are done */
 537        if (mci->op_state == OP_OFFLINE) {
 538                mutex_unlock(&mem_ctls_mutex);
 539                return;
 540        }
 541
 542        /* Only poll controllers that are running polled and have a check */
 543        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 544                mci->edac_check(mci);
 545
 546        mutex_unlock(&mem_ctls_mutex);
 547
 548        /* Reschedule */
 549        queue_delayed_work(edac_workqueue, &mci->work,
 550                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 551}
 552
 553/*
 554 * edac_mc_workq_setup
 555 *      initialize a workq item for this mci
 556 *      passing in the new delay period in msec
 557 *
 558 *      locking model:
 559 *
 560 *              called with the mem_ctls_mutex held
 561 */
 562static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 563{
 564        edac_dbg(0, "\n");
 565
 566        /* if this instance is not in the POLL state, then simply return */
 567        if (mci->op_state != OP_RUNNING_POLL)
 568                return;
 569
 570        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 571        mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 572}
 573
 574/*
 575 * edac_mc_workq_teardown
 576 *      stop the workq processing on this mci
 577 *
 578 *      locking model:
 579 *
 580 *              called WITHOUT lock held
 581 */
 582static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 583{
 584        int status;
 585
 586        if (mci->op_state != OP_RUNNING_POLL)
 587                return;
 588
 589        status = cancel_delayed_work(&mci->work);
 590        if (status == 0) {
 591                edac_dbg(0, "not canceled, flush the queue\n");
 592
 593                /* workq instance might be running, wait for it */
 594                flush_workqueue(edac_workqueue);
 595        }
 596}
 597
 598/*
 599 * edac_mc_reset_delay_period(unsigned long value)
 600 *
 601 *      user space has updated our poll period value, need to
 602 *      reset our workq delays
 603 */
 604void edac_mc_reset_delay_period(int value)
 605{
 606        struct mem_ctl_info *mci;
 607        struct list_head *item;
 608
 609        mutex_lock(&mem_ctls_mutex);
 610
 611        list_for_each(item, &mc_devices) {
 612                mci = list_entry(item, struct mem_ctl_info, link);
 613
 614                edac_mc_workq_setup(mci, (unsigned long) value);
 615        }
 616
 617        mutex_unlock(&mem_ctls_mutex);
 618}
 619
 620
 621
 622/* Return 0 on success, 1 on failure.
 623 * Before calling this function, caller must
 624 * assign a unique value to mci->mc_idx.
 625 *
 626 *      locking model:
 627 *
 628 *              called with the mem_ctls_mutex lock held
 629 */
 630static int add_mc_to_global_list(struct mem_ctl_info *mci)
 631{
 632        struct list_head *item, *insert_before;
 633        struct mem_ctl_info *p;
 634
 635        insert_before = &mc_devices;
 636
 637        p = find_mci_by_dev(mci->pdev);
 638        if (unlikely(p != NULL))
 639                goto fail0;
 640
 641        list_for_each(item, &mc_devices) {
 642                p = list_entry(item, struct mem_ctl_info, link);
 643
 644                if (p->mc_idx >= mci->mc_idx) {
 645                        if (unlikely(p->mc_idx == mci->mc_idx))
 646                                goto fail1;
 647
 648                        insert_before = item;
 649                        break;
 650                }
 651        }
 652
 653        list_add_tail_rcu(&mci->link, insert_before);
 654        atomic_inc(&edac_handlers);
 655        return 0;
 656
 657fail0:
 658        edac_printk(KERN_WARNING, EDAC_MC,
 659                "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 660                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 661        return 1;
 662
 663fail1:
 664        edac_printk(KERN_WARNING, EDAC_MC,
 665                "bug in low-level driver: attempt to assign\n"
 666                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 667        return 1;
 668}
 669
 670static int del_mc_from_global_list(struct mem_ctl_info *mci)
 671{
 672        int handlers = atomic_dec_return(&edac_handlers);
 673        list_del_rcu(&mci->link);
 674
 675        /* these are for safe removal of devices from global list while
 676         * NMI handlers may be traversing list
 677         */
 678        synchronize_rcu();
 679        INIT_LIST_HEAD(&mci->link);
 680
 681        return handlers;
 682}
 683
 684/**
 685 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 686 *
 687 * If found, return a pointer to the structure.
 688 * Else return NULL.
 689 *
 690 * Caller must hold mem_ctls_mutex.
 691 */
 692struct mem_ctl_info *edac_mc_find(int idx)
 693{
 694        struct list_head *item;
 695        struct mem_ctl_info *mci;
 696
 697        list_for_each(item, &mc_devices) {
 698                mci = list_entry(item, struct mem_ctl_info, link);
 699
 700                if (mci->mc_idx >= idx) {
 701                        if (mci->mc_idx == idx)
 702                                return mci;
 703
 704                        break;
 705                }
 706        }
 707
 708        return NULL;
 709}
 710EXPORT_SYMBOL(edac_mc_find);
 711
 712/**
 713 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 714 *                 create sysfs entries associated with mci structure
 715 * @mci: pointer to the mci structure to be added to the list
 716 *
 717 * Return:
 718 *      0       Success
 719 *      !0      Failure
 720 */
 721
 722/* FIXME - should a warning be printed if no error detection? correction? */
 723int edac_mc_add_mc(struct mem_ctl_info *mci)
 724{
 725        int ret = -EINVAL;
 726        edac_dbg(0, "\n");
 727
 728        if (mci->mc_idx >= EDAC_MAX_MCS) {
 729                pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
 730                return -ENODEV;
 731        }
 732
 733#ifdef CONFIG_EDAC_DEBUG
 734        if (edac_debug_level >= 3)
 735                edac_mc_dump_mci(mci);
 736
 737        if (edac_debug_level >= 4) {
 738                int i;
 739
 740                for (i = 0; i < mci->nr_csrows; i++) {
 741                        struct csrow_info *csrow = mci->csrows[i];
 742                        u32 nr_pages = 0;
 743                        int j;
 744
 745                        for (j = 0; j < csrow->nr_channels; j++)
 746                                nr_pages += csrow->channels[j]->dimm->nr_pages;
 747                        if (!nr_pages)
 748                                continue;
 749                        edac_mc_dump_csrow(csrow);
 750                        for (j = 0; j < csrow->nr_channels; j++)
 751                                if (csrow->channels[j]->dimm->nr_pages)
 752                                        edac_mc_dump_channel(csrow->channels[j]);
 753                }
 754                for (i = 0; i < mci->tot_dimms; i++)
 755                        if (mci->dimms[i]->nr_pages)
 756                                edac_mc_dump_dimm(mci->dimms[i], i);
 757        }
 758#endif
 759        mutex_lock(&mem_ctls_mutex);
 760
 761        if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
 762                ret = -EPERM;
 763                goto fail0;
 764        }
 765
 766        if (add_mc_to_global_list(mci))
 767                goto fail0;
 768
 769        /* set load time so that error rate can be tracked */
 770        mci->start_time = jiffies;
 771
 772        mci->bus = &mc_bus[mci->mc_idx];
 773
 774        if (edac_create_sysfs_mci_device(mci)) {
 775                edac_mc_printk(mci, KERN_WARNING,
 776                        "failed to create sysfs device\n");
 777                goto fail1;
 778        }
 779
 780        /* If there IS a check routine, then we are running POLLED */
 781        if (mci->edac_check != NULL) {
 782                /* This instance is NOW RUNNING */
 783                mci->op_state = OP_RUNNING_POLL;
 784
 785                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 786        } else {
 787                mci->op_state = OP_RUNNING_INTERRUPT;
 788        }
 789
 790        /* Report action taken */
 791        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 792                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 793
 794        edac_mc_owner = mci->mod_name;
 795
 796        mutex_unlock(&mem_ctls_mutex);
 797        return 0;
 798
 799fail1:
 800        del_mc_from_global_list(mci);
 801
 802fail0:
 803        mutex_unlock(&mem_ctls_mutex);
 804        return ret;
 805}
 806EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 807
 808/**
 809 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 810 *                 remove mci structure from global list
 811 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 812 *
 813 * Return pointer to removed mci structure, or NULL if device not found.
 814 */
 815struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 816{
 817        struct mem_ctl_info *mci;
 818
 819        edac_dbg(0, "\n");
 820
 821        mutex_lock(&mem_ctls_mutex);
 822
 823        /* find the requested mci struct in the global list */
 824        mci = find_mci_by_dev(dev);
 825        if (mci == NULL) {
 826                mutex_unlock(&mem_ctls_mutex);
 827                return NULL;
 828        }
 829
 830        if (!del_mc_from_global_list(mci))
 831                edac_mc_owner = NULL;
 832        mutex_unlock(&mem_ctls_mutex);
 833
 834        /* flush workq processes */
 835        edac_mc_workq_teardown(mci);
 836
 837        /* marking MCI offline */
 838        mci->op_state = OP_OFFLINE;
 839
 840        /* remove from sysfs */
 841        edac_remove_sysfs_mci_device(mci);
 842
 843        edac_printk(KERN_INFO, EDAC_MC,
 844                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 845                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 846
 847        return mci;
 848}
 849EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 850
 851static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 852                                u32 size)
 853{
 854        struct page *pg;
 855        void *virt_addr;
 856        unsigned long flags = 0;
 857
 858        edac_dbg(3, "\n");
 859
 860        /* ECC error page was not in our memory. Ignore it. */
 861        if (!pfn_valid(page))
 862                return;
 863
 864        /* Find the actual page structure then map it and fix */
 865        pg = pfn_to_page(page);
 866
 867        if (PageHighMem(pg))
 868                local_irq_save(flags);
 869
 870        virt_addr = kmap_atomic(pg);
 871
 872        /* Perform architecture specific atomic scrub operation */
 873        atomic_scrub(virt_addr + offset, size);
 874
 875        /* Unmap and complete */
 876        kunmap_atomic(virt_addr);
 877
 878        if (PageHighMem(pg))
 879                local_irq_restore(flags);
 880}
 881
 882/* FIXME - should return -1 */
 883int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 884{
 885        struct csrow_info **csrows = mci->csrows;
 886        int row, i, j, n;
 887
 888        edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 889        row = -1;
 890
 891        for (i = 0; i < mci->nr_csrows; i++) {
 892                struct csrow_info *csrow = csrows[i];
 893                n = 0;
 894                for (j = 0; j < csrow->nr_channels; j++) {
 895                        struct dimm_info *dimm = csrow->channels[j]->dimm;
 896                        n += dimm->nr_pages;
 897                }
 898                if (n == 0)
 899                        continue;
 900
 901                edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
 902                         mci->mc_idx,
 903                         csrow->first_page, page, csrow->last_page,
 904                         csrow->page_mask);
 905
 906                if ((page >= csrow->first_page) &&
 907                    (page <= csrow->last_page) &&
 908                    ((page & csrow->page_mask) ==
 909                     (csrow->first_page & csrow->page_mask))) {
 910                        row = i;
 911                        break;
 912                }
 913        }
 914
 915        if (row == -1)
 916                edac_mc_printk(mci, KERN_ERR,
 917                        "could not look up page error address %lx\n",
 918                        (unsigned long)page);
 919
 920        return row;
 921}
 922EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 923
 924const char *edac_layer_name[] = {
 925        [EDAC_MC_LAYER_BRANCH] = "branch",
 926        [EDAC_MC_LAYER_CHANNEL] = "channel",
 927        [EDAC_MC_LAYER_SLOT] = "slot",
 928        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 929        [EDAC_MC_LAYER_ALL_MEM] = "memory",
 930};
 931EXPORT_SYMBOL_GPL(edac_layer_name);
 932
 933static void edac_inc_ce_error(struct mem_ctl_info *mci,
 934                              bool enable_per_layer_report,
 935                              const int pos[EDAC_MAX_LAYERS],
 936                              const u16 count)
 937{
 938        int i, index = 0;
 939
 940        mci->ce_mc += count;
 941
 942        if (!enable_per_layer_report) {
 943                mci->ce_noinfo_count += count;
 944                return;
 945        }
 946
 947        for (i = 0; i < mci->n_layers; i++) {
 948                if (pos[i] < 0)
 949                        break;
 950                index += pos[i];
 951                mci->ce_per_layer[i][index] += count;
 952
 953                if (i < mci->n_layers - 1)
 954                        index *= mci->layers[i + 1].size;
 955        }
 956}
 957
 958static void edac_inc_ue_error(struct mem_ctl_info *mci,
 959                                    bool enable_per_layer_report,
 960                                    const int pos[EDAC_MAX_LAYERS],
 961                                    const u16 count)
 962{
 963        int i, index = 0;
 964
 965        mci->ue_mc += count;
 966
 967        if (!enable_per_layer_report) {
 968                mci->ce_noinfo_count += count;
 969                return;
 970        }
 971
 972        for (i = 0; i < mci->n_layers; i++) {
 973                if (pos[i] < 0)
 974                        break;
 975                index += pos[i];
 976                mci->ue_per_layer[i][index] += count;
 977
 978                if (i < mci->n_layers - 1)
 979                        index *= mci->layers[i + 1].size;
 980        }
 981}
 982
 983static void edac_ce_error(struct mem_ctl_info *mci,
 984                          const u16 error_count,
 985                          const int pos[EDAC_MAX_LAYERS],
 986                          const char *msg,
 987                          const char *location,
 988                          const char *label,
 989                          const char *detail,
 990                          const char *other_detail,
 991                          const bool enable_per_layer_report,
 992                          const unsigned long page_frame_number,
 993                          const unsigned long offset_in_page,
 994                          long grain)
 995{
 996        unsigned long remapped_page;
 997        char *msg_aux = "";
 998
 999        if (*msg)
1000                msg_aux = " ";
1001
1002        if (edac_mc_get_log_ce()) {
1003                if (other_detail && *other_detail)
1004                        edac_mc_printk(mci, KERN_WARNING,
1005                                       "%d CE %s%son %s (%s %s - %s)\n",
1006                                       error_count, msg, msg_aux, label,
1007                                       location, detail, other_detail);
1008                else
1009                        edac_mc_printk(mci, KERN_WARNING,
1010                                       "%d CE %s%son %s (%s %s)\n",
1011                                       error_count, msg, msg_aux, label,
1012                                       location, detail);
1013        }
1014        edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1015
1016        if (mci->scrub_mode & SCRUB_SW_SRC) {
1017                /*
1018                        * Some memory controllers (called MCs below) can remap
1019                        * memory so that it is still available at a different
1020                        * address when PCI devices map into memory.
1021                        * MC's that can't do this, lose the memory where PCI
1022                        * devices are mapped. This mapping is MC-dependent
1023                        * and so we call back into the MC driver for it to
1024                        * map the MC page to a physical (CPU) page which can
1025                        * then be mapped to a virtual page - which can then
1026                        * be scrubbed.
1027                        */
1028                remapped_page = mci->ctl_page_to_phys ?
1029                        mci->ctl_page_to_phys(mci, page_frame_number) :
1030                        page_frame_number;
1031
1032                edac_mc_scrub_block(remapped_page,
1033                                        offset_in_page, grain);
1034        }
1035}
1036
1037static void edac_ue_error(struct mem_ctl_info *mci,
1038                          const u16 error_count,
1039                          const int pos[EDAC_MAX_LAYERS],
1040                          const char *msg,
1041                          const char *location,
1042                          const char *label,
1043                          const char *detail,
1044                          const char *other_detail,
1045                          const bool enable_per_layer_report)
1046{
1047        char *msg_aux = "";
1048
1049        if (*msg)
1050                msg_aux = " ";
1051
1052        if (edac_mc_get_log_ue()) {
1053                if (other_detail && *other_detail)
1054                        edac_mc_printk(mci, KERN_WARNING,
1055                                       "%d UE %s%son %s (%s %s - %s)\n",
1056                                       error_count, msg, msg_aux, label,
1057                                       location, detail, other_detail);
1058                else
1059                        edac_mc_printk(mci, KERN_WARNING,
1060                                       "%d UE %s%son %s (%s %s)\n",
1061                                       error_count, msg, msg_aux, label,
1062                                       location, detail);
1063        }
1064
1065        if (edac_mc_get_panic_on_ue()) {
1066                if (other_detail && *other_detail)
1067                        panic("UE %s%son %s (%s%s - %s)\n",
1068                              msg, msg_aux, label, location, detail, other_detail);
1069                else
1070                        panic("UE %s%son %s (%s%s)\n",
1071                              msg, msg_aux, label, location, detail);
1072        }
1073
1074        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1075}
1076
1077/**
1078 * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1079 *                            anything to discover the error location
1080 *
1081 * @type:               severity of the error (CE/UE/Fatal)
1082 * @mci:                a struct mem_ctl_info pointer
1083 * @e:                  error description
1084 *
1085 * This raw function is used internally by edac_mc_handle_error(). It should
1086 * only be called directly when the hardware error come directly from BIOS,
1087 * like in the case of APEI GHES driver.
1088 */
1089void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1090                              struct mem_ctl_info *mci,
1091                              struct edac_raw_error_desc *e)
1092{
1093        char detail[80];
1094        int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1095
1096        /* Memory type dependent details about the error */
1097        if (type == HW_EVENT_ERR_CORRECTED) {
1098                snprintf(detail, sizeof(detail),
1099                        "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1100                        e->page_frame_number, e->offset_in_page,
1101                        e->grain, e->syndrome);
1102                edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1103                              detail, e->other_detail, e->enable_per_layer_report,
1104                              e->page_frame_number, e->offset_in_page, e->grain);
1105        } else {
1106                snprintf(detail, sizeof(detail),
1107                        "page:0x%lx offset:0x%lx grain:%ld",
1108                        e->page_frame_number, e->offset_in_page, e->grain);
1109
1110                edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1111                              detail, e->other_detail, e->enable_per_layer_report);
1112        }
1113
1114
1115}
1116EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1117
1118/**
1119 * edac_mc_handle_error - reports a memory event to userspace
1120 *
1121 * @type:               severity of the error (CE/UE/Fatal)
1122 * @mci:                a struct mem_ctl_info pointer
1123 * @error_count:        Number of errors of the same type
1124 * @page_frame_number:  mem page where the error occurred
1125 * @offset_in_page:     offset of the error inside the page
1126 * @syndrome:           ECC syndrome
1127 * @top_layer:          Memory layer[0] position
1128 * @mid_layer:          Memory layer[1] position
1129 * @low_layer:          Memory layer[2] position
1130 * @msg:                Message meaningful to the end users that
1131 *                      explains the event
1132 * @other_detail:       Technical details about the event that
1133 *                      may help hardware manufacturers and
1134 *                      EDAC developers to analyse the event
1135 */
1136void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1137                          struct mem_ctl_info *mci,
1138                          const u16 error_count,
1139                          const unsigned long page_frame_number,
1140                          const unsigned long offset_in_page,
1141                          const unsigned long syndrome,
1142                          const int top_layer,
1143                          const int mid_layer,
1144                          const int low_layer,
1145                          const char *msg,
1146                          const char *other_detail)
1147{
1148        char *p;
1149        int row = -1, chan = -1;
1150        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1151        int i, n_labels = 0;
1152        u8 grain_bits;
1153        struct edac_raw_error_desc *e = &mci->error_desc;
1154
1155        edac_dbg(3, "MC%d\n", mci->mc_idx);
1156
1157        /* Fills the error report buffer */
1158        memset(e, 0, sizeof (*e));
1159        e->error_count = error_count;
1160        e->top_layer = top_layer;
1161        e->mid_layer = mid_layer;
1162        e->low_layer = low_layer;
1163        e->page_frame_number = page_frame_number;
1164        e->offset_in_page = offset_in_page;
1165        e->syndrome = syndrome;
1166        e->msg = msg;
1167        e->other_detail = other_detail;
1168
1169        /*
1170         * Check if the event report is consistent and if the memory
1171         * location is known. If it is known, enable_per_layer_report will be
1172         * true, the DIMM(s) label info will be filled and the per-layer
1173         * error counters will be incremented.
1174         */
1175        for (i = 0; i < mci->n_layers; i++) {
1176                if (pos[i] >= (int)mci->layers[i].size) {
1177
1178                        edac_mc_printk(mci, KERN_ERR,
1179                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1180                                       edac_layer_name[mci->layers[i].type],
1181                                       pos[i], mci->layers[i].size);
1182                        /*
1183                         * Instead of just returning it, let's use what's
1184                         * known about the error. The increment routines and
1185                         * the DIMM filter logic will do the right thing by
1186                         * pointing the likely damaged DIMMs.
1187                         */
1188                        pos[i] = -1;
1189                }
1190                if (pos[i] >= 0)
1191                        e->enable_per_layer_report = true;
1192        }
1193
1194        /*
1195         * Get the dimm label/grain that applies to the match criteria.
1196         * As the error algorithm may not be able to point to just one memory
1197         * stick, the logic here will get all possible labels that could
1198         * pottentially be affected by the error.
1199         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1200         * to have only the MC channel and the MC dimm (also called "branch")
1201         * but the channel is not known, as the memory is arranged in pairs,
1202         * where each memory belongs to a separate channel within the same
1203         * branch.
1204         */
1205        p = e->label;
1206        *p = '\0';
1207
1208        for (i = 0; i < mci->tot_dimms; i++) {
1209                struct dimm_info *dimm = mci->dimms[i];
1210
1211                if (top_layer >= 0 && top_layer != dimm->location[0])
1212                        continue;
1213                if (mid_layer >= 0 && mid_layer != dimm->location[1])
1214                        continue;
1215                if (low_layer >= 0 && low_layer != dimm->location[2])
1216                        continue;
1217
1218                /* get the max grain, over the error match range */
1219                if (dimm->grain > e->grain)
1220                        e->grain = dimm->grain;
1221
1222                /*
1223                 * If the error is memory-controller wide, there's no need to
1224                 * seek for the affected DIMMs because the whole
1225                 * channel/memory controller/...  may be affected.
1226                 * Also, don't show errors for empty DIMM slots.
1227                 */
1228                if (e->enable_per_layer_report && dimm->nr_pages) {
1229                        if (n_labels >= EDAC_MAX_LABELS) {
1230                                e->enable_per_layer_report = false;
1231                                break;
1232                        }
1233                        n_labels++;
1234                        if (p != e->label) {
1235                                strcpy(p, OTHER_LABEL);
1236                                p += strlen(OTHER_LABEL);
1237                        }
1238                        strcpy(p, dimm->label);
1239                        p += strlen(p);
1240                        *p = '\0';
1241
1242                        /*
1243                         * get csrow/channel of the DIMM, in order to allow
1244                         * incrementing the compat API counters
1245                         */
1246                        edac_dbg(4, "%s csrows map: (%d,%d)\n",
1247                                 mci->csbased ? "rank" : "dimm",
1248                                 dimm->csrow, dimm->cschannel);
1249                        if (row == -1)
1250                                row = dimm->csrow;
1251                        else if (row >= 0 && row != dimm->csrow)
1252                                row = -2;
1253
1254                        if (chan == -1)
1255                                chan = dimm->cschannel;
1256                        else if (chan >= 0 && chan != dimm->cschannel)
1257                                chan = -2;
1258                }
1259        }
1260
1261        if (!e->enable_per_layer_report) {
1262                strcpy(e->label, "any memory");
1263        } else {
1264                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1265                if (p == e->label)
1266                        strcpy(e->label, "unknown memory");
1267                if (type == HW_EVENT_ERR_CORRECTED) {
1268                        if (row >= 0) {
1269                                mci->csrows[row]->ce_count += error_count;
1270                                if (chan >= 0)
1271                                        mci->csrows[row]->channels[chan]->ce_count += error_count;
1272                        }
1273                } else
1274                        if (row >= 0)
1275                                mci->csrows[row]->ue_count += error_count;
1276        }
1277
1278        /* Fill the RAM location data */
1279        p = e->location;
1280
1281        for (i = 0; i < mci->n_layers; i++) {
1282                if (pos[i] < 0)
1283                        continue;
1284
1285                p += sprintf(p, "%s:%d ",
1286                             edac_layer_name[mci->layers[i].type],
1287                             pos[i]);
1288        }
1289        if (p > e->location)
1290                *(p - 1) = '\0';
1291
1292        /* Report the error via the trace interface */
1293        grain_bits = fls_long(e->grain) + 1;
1294        trace_mc_event(type, e->msg, e->label, e->error_count,
1295                       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1296                       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1297                       grain_bits, e->syndrome, e->other_detail);
1298
1299        edac_raw_mc_handle_error(type, mci, e);
1300}
1301EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1302
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.