linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <linux/bitops.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37#define CREATE_TRACE_POINTS
  38#define TRACE_INCLUDE_PATH ../../include/ras
  39#include <ras/ras_event.h>
  40
  41/* lock to memory controller's control array */
  42static DEFINE_MUTEX(mem_ctls_mutex);
  43static LIST_HEAD(mc_devices);
  44
  45unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
  46                                 unsigned len)
  47{
  48        struct mem_ctl_info *mci = dimm->mci;
  49        int i, n, count = 0;
  50        char *p = buf;
  51
  52        for (i = 0; i < mci->n_layers; i++) {
  53                n = snprintf(p, len, "%s %d ",
  54                              edac_layer_name[mci->layers[i].type],
  55                              dimm->location[i]);
  56                p += n;
  57                len -= n;
  58                count += n;
  59                if (!len)
  60                        break;
  61        }
  62
  63        return count;
  64}
  65
  66#ifdef CONFIG_EDAC_DEBUG
  67
  68static void edac_mc_dump_channel(struct rank_info *chan)
  69{
  70        edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
  71        edac_dbg(4, "    channel = %p\n", chan);
  72        edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
  73        edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
  74}
  75
  76static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
  77{
  78        char location[80];
  79
  80        edac_dimm_info_location(dimm, location, sizeof(location));
  81
  82        edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
  83                 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
  84                 number, location, dimm->csrow, dimm->cschannel);
  85        edac_dbg(4, "  dimm = %p\n", dimm);
  86        edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
  87        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  88        edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
  89        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  90}
  91
  92static void edac_mc_dump_csrow(struct csrow_info *csrow)
  93{
  94        edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
  95        edac_dbg(4, "  csrow = %p\n", csrow);
  96        edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
  97        edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
  98        edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
  99        edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
 100        edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
 101        edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 102}
 103
 104static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 105{
 106        edac_dbg(3, "\tmci = %p\n", mci);
 107        edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
 108        edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
 109        edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
 110        edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
 111        edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
 112                 mci->nr_csrows, mci->csrows);
 113        edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
 114                 mci->tot_dimms, mci->dimms);
 115        edac_dbg(3, "\tdev = %p\n", mci->pdev);
 116        edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
 117                 mci->mod_name, mci->ctl_name);
 118        edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 119}
 120
 121#endif                          /* CONFIG_EDAC_DEBUG */
 122
 123/*
 124 * keep those in sync with the enum mem_type
 125 */
 126const char *edac_mem_types[] = {
 127        "Empty csrow",
 128        "Reserved csrow type",
 129        "Unknown csrow type",
 130        "Fast page mode RAM",
 131        "Extended data out RAM",
 132        "Burst Extended data out RAM",
 133        "Single data rate SDRAM",
 134        "Registered single data rate SDRAM",
 135        "Double data rate SDRAM",
 136        "Registered Double data rate SDRAM",
 137        "Rambus DRAM",
 138        "Unbuffered DDR2 RAM",
 139        "Fully buffered DDR2",
 140        "Registered DDR2 RAM",
 141        "Rambus XDR",
 142        "Unbuffered DDR3 RAM",
 143        "Registered DDR3 RAM",
 144};
 145EXPORT_SYMBOL_GPL(edac_mem_types);
 146
 147/**
 148 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 149 * @p:          pointer to a pointer with the memory offset to be used. At
 150 *              return, this will be incremented to point to the next offset
 151 * @size:       Size of the data structure to be reserved
 152 * @n_elems:    Number of elements that should be reserved
 153 *
 154 * If 'size' is a constant, the compiler will optimize this whole function
 155 * down to either a no-op or the addition of a constant to the value of '*p'.
 156 *
 157 * The 'p' pointer is absolutely needed to keep the proper advancing
 158 * further in memory to the proper offsets when allocating the struct along
 159 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 160 * above, for example.
 161 *
 162 * At return, the pointer 'p' will be incremented to be used on a next call
 163 * to this function.
 164 */
 165void *edac_align_ptr(void **p, unsigned size, int n_elems)
 166{
 167        unsigned align, r;
 168        void *ptr = *p;
 169
 170        *p += size * n_elems;
 171
 172        /*
 173         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 174         * 'size'.  Adjust 'p' so that its alignment is at least as
 175         * stringent as what the compiler would provide for X and return
 176         * the aligned result.
 177         * Here we assume that the alignment of a "long long" is the most
 178         * stringent alignment that the compiler will ever provide by default.
 179         * As far as I know, this is a reasonable assumption.
 180         */
 181        if (size > sizeof(long))
 182                align = sizeof(long long);
 183        else if (size > sizeof(int))
 184                align = sizeof(long);
 185        else if (size > sizeof(short))
 186                align = sizeof(int);
 187        else if (size > sizeof(char))
 188                align = sizeof(short);
 189        else
 190                return (char *)ptr;
 191
 192        r = (unsigned long)p % align;
 193
 194        if (r == 0)
 195                return (char *)ptr;
 196
 197        *p += align - r;
 198
 199        return (void *)(((unsigned long)ptr) + align - r);
 200}
 201
 202static void _edac_mc_free(struct mem_ctl_info *mci)
 203{
 204        int i, chn, row;
 205        struct csrow_info *csr;
 206        const unsigned int tot_dimms = mci->tot_dimms;
 207        const unsigned int tot_channels = mci->num_cschannel;
 208        const unsigned int tot_csrows = mci->nr_csrows;
 209
 210        if (mci->dimms) {
 211                for (i = 0; i < tot_dimms; i++)
 212                        kfree(mci->dimms[i]);
 213                kfree(mci->dimms);
 214        }
 215        if (mci->csrows) {
 216                for (row = 0; row < tot_csrows; row++) {
 217                        csr = mci->csrows[row];
 218                        if (csr) {
 219                                if (csr->channels) {
 220                                        for (chn = 0; chn < tot_channels; chn++)
 221                                                kfree(csr->channels[chn]);
 222                                        kfree(csr->channels);
 223                                }
 224                                kfree(csr);
 225                        }
 226                }
 227                kfree(mci->csrows);
 228        }
 229        kfree(mci);
 230}
 231
 232/**
 233 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 234 * @mc_num:             Memory controller number
 235 * @n_layers:           Number of MC hierarchy layers
 236 * layers:              Describes each layer as seen by the Memory Controller
 237 * @size_pvt:           size of private storage needed
 238 *
 239 *
 240 * Everything is kmalloc'ed as one big chunk - more efficient.
 241 * Only can be used if all structures have the same lifetime - otherwise
 242 * you have to allocate and initialize your own structures.
 243 *
 244 * Use edac_mc_free() to free mc structures allocated by this function.
 245 *
 246 * NOTE: drivers handle multi-rank memories in different ways: in some
 247 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 248 * others, a single multi-rank memory stick would be mapped into several
 249 * entries. Currently, this function will allocate multiple struct dimm_info
 250 * on such scenarios, as grouping the multiple ranks require drivers change.
 251 *
 252 * Returns:
 253 *      On failure: NULL
 254 *      On success: struct mem_ctl_info pointer
 255 */
 256struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 257                                   unsigned n_layers,
 258                                   struct edac_mc_layer *layers,
 259                                   unsigned sz_pvt)
 260{
 261        struct mem_ctl_info *mci;
 262        struct edac_mc_layer *layer;
 263        struct csrow_info *csr;
 264        struct rank_info *chan;
 265        struct dimm_info *dimm;
 266        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 267        unsigned pos[EDAC_MAX_LAYERS];
 268        unsigned size, tot_dimms = 1, count = 1;
 269        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 270        void *pvt, *p, *ptr = NULL;
 271        int i, j, row, chn, n, len, off;
 272        bool per_rank = false;
 273
 274        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 275        /*
 276         * Calculate the total amount of dimms and csrows/cschannels while
 277         * in the old API emulation mode
 278         */
 279        for (i = 0; i < n_layers; i++) {
 280                tot_dimms *= layers[i].size;
 281                if (layers[i].is_virt_csrow)
 282                        tot_csrows *= layers[i].size;
 283                else
 284                        tot_channels *= layers[i].size;
 285
 286                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 287                        per_rank = true;
 288        }
 289
 290        /* Figure out the offsets of the various items from the start of an mc
 291         * structure.  We want the alignment of each item to be at least as
 292         * stringent as what the compiler would provide if we could simply
 293         * hardcode everything into a single struct.
 294         */
 295        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 296        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 297        for (i = 0; i < n_layers; i++) {
 298                count *= layers[i].size;
 299                edac_dbg(4, "errcount layer %d size %d\n", i, count);
 300                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 301                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 302                tot_errcount += 2 * count;
 303        }
 304
 305        edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 306        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 307        size = ((unsigned long)pvt) + sz_pvt;
 308
 309        edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 310                 size,
 311                 tot_dimms,
 312                 per_rank ? "ranks" : "dimms",
 313                 tot_csrows * tot_channels);
 314
 315        mci = kzalloc(size, GFP_KERNEL);
 316        if (mci == NULL)
 317                return NULL;
 318
 319        /* Adjust pointers so they point within the memory we just allocated
 320         * rather than an imaginary chunk of memory located at address 0.
 321         */
 322        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 323        for (i = 0; i < n_layers; i++) {
 324                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 325                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 326        }
 327        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 328
 329        /* setup index and various internal pointers */
 330        mci->mc_idx = mc_num;
 331        mci->tot_dimms = tot_dimms;
 332        mci->pvt_info = pvt;
 333        mci->n_layers = n_layers;
 334        mci->layers = layer;
 335        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 336        mci->nr_csrows = tot_csrows;
 337        mci->num_cschannel = tot_channels;
 338        mci->mem_is_per_rank = per_rank;
 339
 340        /*
 341         * Alocate and fill the csrow/channels structs
 342         */
 343        mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
 344        if (!mci->csrows)
 345                goto error;
 346        for (row = 0; row < tot_csrows; row++) {
 347                csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 348                if (!csr)
 349                        goto error;
 350                mci->csrows[row] = csr;
 351                csr->csrow_idx = row;
 352                csr->mci = mci;
 353                csr->nr_channels = tot_channels;
 354                csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
 355                                        GFP_KERNEL);
 356                if (!csr->channels)
 357                        goto error;
 358
 359                for (chn = 0; chn < tot_channels; chn++) {
 360                        chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 361                        if (!chan)
 362                                goto error;
 363                        csr->channels[chn] = chan;
 364                        chan->chan_idx = chn;
 365                        chan->csrow = csr;
 366                }
 367        }
 368
 369        /*
 370         * Allocate and fill the dimm structs
 371         */
 372        mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
 373        if (!mci->dimms)
 374                goto error;
 375
 376        memset(&pos, 0, sizeof(pos));
 377        row = 0;
 378        chn = 0;
 379        for (i = 0; i < tot_dimms; i++) {
 380                chan = mci->csrows[row]->channels[chn];
 381                off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 382                if (off < 0 || off >= tot_dimms) {
 383                        edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 384                        goto error;
 385                }
 386
 387                dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 388                if (!dimm)
 389                        goto error;
 390                mci->dimms[off] = dimm;
 391                dimm->mci = mci;
 392
 393                /*
 394                 * Copy DIMM location and initialize it.
 395                 */
 396                len = sizeof(dimm->label);
 397                p = dimm->label;
 398                n = snprintf(p, len, "mc#%u", mc_num);
 399                p += n;
 400                len -= n;
 401                for (j = 0; j < n_layers; j++) {
 402                        n = snprintf(p, len, "%s#%u",
 403                                     edac_layer_name[layers[j].type],
 404                                     pos[j]);
 405                        p += n;
 406                        len -= n;
 407                        dimm->location[j] = pos[j];
 408
 409                        if (len <= 0)
 410                                break;
 411                }
 412
 413                /* Link it to the csrows old API data */
 414                chan->dimm = dimm;
 415                dimm->csrow = row;
 416                dimm->cschannel = chn;
 417
 418                /* Increment csrow location */
 419                row++;
 420                if (row == tot_csrows) {
 421                        row = 0;
 422                        chn++;
 423                }
 424
 425                /* Increment dimm location */
 426                for (j = n_layers - 1; j >= 0; j--) {
 427                        pos[j]++;
 428                        if (pos[j] < layers[j].size)
 429                                break;
 430                        pos[j] = 0;
 431                }
 432        }
 433
 434        mci->op_state = OP_ALLOC;
 435
 436        /* at this point, the root kobj is valid, and in order to
 437         * 'free' the object, then the function:
 438         *      edac_mc_unregister_sysfs_main_kobj() must be called
 439         * which will perform kobj unregistration and the actual free
 440         * will occur during the kobject callback operation
 441         */
 442
 443        return mci;
 444
 445error:
 446        _edac_mc_free(mci);
 447
 448        return NULL;
 449}
 450EXPORT_SYMBOL_GPL(edac_mc_alloc);
 451
 452/**
 453 * edac_mc_free
 454 *      'Free' a previously allocated 'mci' structure
 455 * @mci: pointer to a struct mem_ctl_info structure
 456 */
 457void edac_mc_free(struct mem_ctl_info *mci)
 458{
 459        edac_dbg(1, "\n");
 460
 461        /* If we're not yet registered with sysfs free only what was allocated
 462         * in edac_mc_alloc().
 463         */
 464        if (!device_is_registered(&mci->dev)) {
 465                _edac_mc_free(mci);
 466                return;
 467        }
 468
 469        /* the mci instance is freed here, when the sysfs object is dropped */
 470        edac_unregister_sysfs(mci);
 471}
 472EXPORT_SYMBOL_GPL(edac_mc_free);
 473
 474
 475/**
 476 * find_mci_by_dev
 477 *
 478 *      scan list of controllers looking for the one that manages
 479 *      the 'dev' device
 480 * @dev: pointer to a struct device related with the MCI
 481 */
 482struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 483{
 484        struct mem_ctl_info *mci;
 485        struct list_head *item;
 486
 487        edac_dbg(3, "\n");
 488
 489        list_for_each(item, &mc_devices) {
 490                mci = list_entry(item, struct mem_ctl_info, link);
 491
 492                if (mci->pdev == dev)
 493                        return mci;
 494        }
 495
 496        return NULL;
 497}
 498EXPORT_SYMBOL_GPL(find_mci_by_dev);
 499
 500/*
 501 * handler for EDAC to check if NMI type handler has asserted interrupt
 502 */
 503static int edac_mc_assert_error_check_and_clear(void)
 504{
 505        int old_state;
 506
 507        if (edac_op_state == EDAC_OPSTATE_POLL)
 508                return 1;
 509
 510        old_state = edac_err_assert;
 511        edac_err_assert = 0;
 512
 513        return old_state;
 514}
 515
 516/*
 517 * edac_mc_workq_function
 518 *      performs the operation scheduled by a workq request
 519 */
 520static void edac_mc_workq_function(struct work_struct *work_req)
 521{
 522        struct delayed_work *d_work = to_delayed_work(work_req);
 523        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 524
 525        mutex_lock(&mem_ctls_mutex);
 526
 527        /* if this control struct has movd to offline state, we are done */
 528        if (mci->op_state == OP_OFFLINE) {
 529                mutex_unlock(&mem_ctls_mutex);
 530                return;
 531        }
 532
 533        /* Only poll controllers that are running polled and have a check */
 534        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 535                mci->edac_check(mci);
 536
 537        mutex_unlock(&mem_ctls_mutex);
 538
 539        /* Reschedule */
 540        queue_delayed_work(edac_workqueue, &mci->work,
 541                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 542}
 543
 544/*
 545 * edac_mc_workq_setup
 546 *      initialize a workq item for this mci
 547 *      passing in the new delay period in msec
 548 *
 549 *      locking model:
 550 *
 551 *              called with the mem_ctls_mutex held
 552 */
 553static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 554{
 555        edac_dbg(0, "\n");
 556
 557        /* if this instance is not in the POLL state, then simply return */
 558        if (mci->op_state != OP_RUNNING_POLL)
 559                return;
 560
 561        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 562        queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 563}
 564
 565/*
 566 * edac_mc_workq_teardown
 567 *      stop the workq processing on this mci
 568 *
 569 *      locking model:
 570 *
 571 *              called WITHOUT lock held
 572 */
 573static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 574{
 575        int status;
 576
 577        if (mci->op_state != OP_RUNNING_POLL)
 578                return;
 579
 580        status = cancel_delayed_work(&mci->work);
 581        if (status == 0) {
 582                edac_dbg(0, "not canceled, flush the queue\n");
 583
 584                /* workq instance might be running, wait for it */
 585                flush_workqueue(edac_workqueue);
 586        }
 587}
 588
 589/*
 590 * edac_mc_reset_delay_period(unsigned long value)
 591 *
 592 *      user space has updated our poll period value, need to
 593 *      reset our workq delays
 594 */
 595void edac_mc_reset_delay_period(int value)
 596{
 597        struct mem_ctl_info *mci;
 598        struct list_head *item;
 599
 600        mutex_lock(&mem_ctls_mutex);
 601
 602        /* scan the list and turn off all workq timers, doing so under lock
 603         */
 604        list_for_each(item, &mc_devices) {
 605                mci = list_entry(item, struct mem_ctl_info, link);
 606
 607                if (mci->op_state == OP_RUNNING_POLL)
 608                        cancel_delayed_work(&mci->work);
 609        }
 610
 611        mutex_unlock(&mem_ctls_mutex);
 612
 613
 614        /* re-walk the list, and reset the poll delay */
 615        mutex_lock(&mem_ctls_mutex);
 616
 617        list_for_each(item, &mc_devices) {
 618                mci = list_entry(item, struct mem_ctl_info, link);
 619
 620                edac_mc_workq_setup(mci, (unsigned long) value);
 621        }
 622
 623        mutex_unlock(&mem_ctls_mutex);
 624}
 625
 626
 627
 628/* Return 0 on success, 1 on failure.
 629 * Before calling this function, caller must
 630 * assign a unique value to mci->mc_idx.
 631 *
 632 *      locking model:
 633 *
 634 *              called with the mem_ctls_mutex lock held
 635 */
 636static int add_mc_to_global_list(struct mem_ctl_info *mci)
 637{
 638        struct list_head *item, *insert_before;
 639        struct mem_ctl_info *p;
 640
 641        insert_before = &mc_devices;
 642
 643        p = find_mci_by_dev(mci->pdev);
 644        if (unlikely(p != NULL))
 645                goto fail0;
 646
 647        list_for_each(item, &mc_devices) {
 648                p = list_entry(item, struct mem_ctl_info, link);
 649
 650                if (p->mc_idx >= mci->mc_idx) {
 651                        if (unlikely(p->mc_idx == mci->mc_idx))
 652                                goto fail1;
 653
 654                        insert_before = item;
 655                        break;
 656                }
 657        }
 658
 659        list_add_tail_rcu(&mci->link, insert_before);
 660        atomic_inc(&edac_handlers);
 661        return 0;
 662
 663fail0:
 664        edac_printk(KERN_WARNING, EDAC_MC,
 665                "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 666                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 667        return 1;
 668
 669fail1:
 670        edac_printk(KERN_WARNING, EDAC_MC,
 671                "bug in low-level driver: attempt to assign\n"
 672                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 673        return 1;
 674}
 675
 676static void del_mc_from_global_list(struct mem_ctl_info *mci)
 677{
 678        atomic_dec(&edac_handlers);
 679        list_del_rcu(&mci->link);
 680
 681        /* these are for safe removal of devices from global list while
 682         * NMI handlers may be traversing list
 683         */
 684        synchronize_rcu();
 685        INIT_LIST_HEAD(&mci->link);
 686}
 687
 688/**
 689 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 690 *
 691 * If found, return a pointer to the structure.
 692 * Else return NULL.
 693 *
 694 * Caller must hold mem_ctls_mutex.
 695 */
 696struct mem_ctl_info *edac_mc_find(int idx)
 697{
 698        struct list_head *item;
 699        struct mem_ctl_info *mci;
 700
 701        list_for_each(item, &mc_devices) {
 702                mci = list_entry(item, struct mem_ctl_info, link);
 703
 704                if (mci->mc_idx >= idx) {
 705                        if (mci->mc_idx == idx)
 706                                return mci;
 707
 708                        break;
 709                }
 710        }
 711
 712        return NULL;
 713}
 714EXPORT_SYMBOL(edac_mc_find);
 715
 716/**
 717 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 718 *                 create sysfs entries associated with mci structure
 719 * @mci: pointer to the mci structure to be added to the list
 720 *
 721 * Return:
 722 *      0       Success
 723 *      !0      Failure
 724 */
 725
 726/* FIXME - should a warning be printed if no error detection? correction? */
 727int edac_mc_add_mc(struct mem_ctl_info *mci)
 728{
 729        edac_dbg(0, "\n");
 730
 731#ifdef CONFIG_EDAC_DEBUG
 732        if (edac_debug_level >= 3)
 733                edac_mc_dump_mci(mci);
 734
 735        if (edac_debug_level >= 4) {
 736                int i;
 737
 738                for (i = 0; i < mci->nr_csrows; i++) {
 739                        struct csrow_info *csrow = mci->csrows[i];
 740                        u32 nr_pages = 0;
 741                        int j;
 742
 743                        for (j = 0; j < csrow->nr_channels; j++)
 744                                nr_pages += csrow->channels[j]->dimm->nr_pages;
 745                        if (!nr_pages)
 746                                continue;
 747                        edac_mc_dump_csrow(csrow);
 748                        for (j = 0; j < csrow->nr_channels; j++)
 749                                if (csrow->channels[j]->dimm->nr_pages)
 750                                        edac_mc_dump_channel(csrow->channels[j]);
 751                }
 752                for (i = 0; i < mci->tot_dimms; i++)
 753                        if (mci->dimms[i]->nr_pages)
 754                                edac_mc_dump_dimm(mci->dimms[i], i);
 755        }
 756#endif
 757        mutex_lock(&mem_ctls_mutex);
 758
 759        if (add_mc_to_global_list(mci))
 760                goto fail0;
 761
 762        /* set load time so that error rate can be tracked */
 763        mci->start_time = jiffies;
 764
 765        if (edac_create_sysfs_mci_device(mci)) {
 766                edac_mc_printk(mci, KERN_WARNING,
 767                        "failed to create sysfs device\n");
 768                goto fail1;
 769        }
 770
 771        /* If there IS a check routine, then we are running POLLED */
 772        if (mci->edac_check != NULL) {
 773                /* This instance is NOW RUNNING */
 774                mci->op_state = OP_RUNNING_POLL;
 775
 776                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 777        } else {
 778                mci->op_state = OP_RUNNING_INTERRUPT;
 779        }
 780
 781        /* Report action taken */
 782        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 783                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 784
 785        mutex_unlock(&mem_ctls_mutex);
 786        return 0;
 787
 788fail1:
 789        del_mc_from_global_list(mci);
 790
 791fail0:
 792        mutex_unlock(&mem_ctls_mutex);
 793        return 1;
 794}
 795EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 796
 797/**
 798 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 799 *                 remove mci structure from global list
 800 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 801 *
 802 * Return pointer to removed mci structure, or NULL if device not found.
 803 */
 804struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 805{
 806        struct mem_ctl_info *mci;
 807
 808        edac_dbg(0, "\n");
 809
 810        mutex_lock(&mem_ctls_mutex);
 811
 812        /* find the requested mci struct in the global list */
 813        mci = find_mci_by_dev(dev);
 814        if (mci == NULL) {
 815                mutex_unlock(&mem_ctls_mutex);
 816                return NULL;
 817        }
 818
 819        del_mc_from_global_list(mci);
 820        mutex_unlock(&mem_ctls_mutex);
 821
 822        /* flush workq processes */
 823        edac_mc_workq_teardown(mci);
 824
 825        /* marking MCI offline */
 826        mci->op_state = OP_OFFLINE;
 827
 828        /* remove from sysfs */
 829        edac_remove_sysfs_mci_device(mci);
 830
 831        edac_printk(KERN_INFO, EDAC_MC,
 832                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 833                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 834
 835        return mci;
 836}
 837EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 838
 839static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 840                                u32 size)
 841{
 842        struct page *pg;
 843        void *virt_addr;
 844        unsigned long flags = 0;
 845
 846        edac_dbg(3, "\n");
 847
 848        /* ECC error page was not in our memory. Ignore it. */
 849        if (!pfn_valid(page))
 850                return;
 851
 852        /* Find the actual page structure then map it and fix */
 853        pg = pfn_to_page(page);
 854
 855        if (PageHighMem(pg))
 856                local_irq_save(flags);
 857
 858        virt_addr = kmap_atomic(pg);
 859
 860        /* Perform architecture specific atomic scrub operation */
 861        atomic_scrub(virt_addr + offset, size);
 862
 863        /* Unmap and complete */
 864        kunmap_atomic(virt_addr);
 865
 866        if (PageHighMem(pg))
 867                local_irq_restore(flags);
 868}
 869
 870/* FIXME - should return -1 */
 871int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 872{
 873        struct csrow_info **csrows = mci->csrows;
 874        int row, i, j, n;
 875
 876        edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 877        row = -1;
 878
 879        for (i = 0; i < mci->nr_csrows; i++) {
 880                struct csrow_info *csrow = csrows[i];
 881                n = 0;
 882                for (j = 0; j < csrow->nr_channels; j++) {
 883                        struct dimm_info *dimm = csrow->channels[j]->dimm;
 884                        n += dimm->nr_pages;
 885                }
 886                if (n == 0)
 887                        continue;
 888
 889                edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
 890                         mci->mc_idx,
 891                         csrow->first_page, page, csrow->last_page,
 892                         csrow->page_mask);
 893
 894                if ((page >= csrow->first_page) &&
 895                    (page <= csrow->last_page) &&
 896                    ((page & csrow->page_mask) ==
 897                     (csrow->first_page & csrow->page_mask))) {
 898                        row = i;
 899                        break;
 900                }
 901        }
 902
 903        if (row == -1)
 904                edac_mc_printk(mci, KERN_ERR,
 905                        "could not look up page error address %lx\n",
 906                        (unsigned long)page);
 907
 908        return row;
 909}
 910EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 911
 912const char *edac_layer_name[] = {
 913        [EDAC_MC_LAYER_BRANCH] = "branch",
 914        [EDAC_MC_LAYER_CHANNEL] = "channel",
 915        [EDAC_MC_LAYER_SLOT] = "slot",
 916        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 917};
 918EXPORT_SYMBOL_GPL(edac_layer_name);
 919
 920static void edac_inc_ce_error(struct mem_ctl_info *mci,
 921                              bool enable_per_layer_report,
 922                              const int pos[EDAC_MAX_LAYERS],
 923                              const u16 count)
 924{
 925        int i, index = 0;
 926
 927        mci->ce_mc += count;
 928
 929        if (!enable_per_layer_report) {
 930                mci->ce_noinfo_count += count;
 931                return;
 932        }
 933
 934        for (i = 0; i < mci->n_layers; i++) {
 935                if (pos[i] < 0)
 936                        break;
 937                index += pos[i];
 938                mci->ce_per_layer[i][index] += count;
 939
 940                if (i < mci->n_layers - 1)
 941                        index *= mci->layers[i + 1].size;
 942        }
 943}
 944
 945static void edac_inc_ue_error(struct mem_ctl_info *mci,
 946                                    bool enable_per_layer_report,
 947                                    const int pos[EDAC_MAX_LAYERS],
 948                                    const u16 count)
 949{
 950        int i, index = 0;
 951
 952        mci->ue_mc += count;
 953
 954        if (!enable_per_layer_report) {
 955                mci->ce_noinfo_count += count;
 956                return;
 957        }
 958
 959        for (i = 0; i < mci->n_layers; i++) {
 960                if (pos[i] < 0)
 961                        break;
 962                index += pos[i];
 963                mci->ue_per_layer[i][index] += count;
 964
 965                if (i < mci->n_layers - 1)
 966                        index *= mci->layers[i + 1].size;
 967        }
 968}
 969
 970static void edac_ce_error(struct mem_ctl_info *mci,
 971                          const u16 error_count,
 972                          const int pos[EDAC_MAX_LAYERS],
 973                          const char *msg,
 974                          const char *location,
 975                          const char *label,
 976                          const char *detail,
 977                          const char *other_detail,
 978                          const bool enable_per_layer_report,
 979                          const unsigned long page_frame_number,
 980                          const unsigned long offset_in_page,
 981                          long grain)
 982{
 983        unsigned long remapped_page;
 984
 985        if (edac_mc_get_log_ce()) {
 986                if (other_detail && *other_detail)
 987                        edac_mc_printk(mci, KERN_WARNING,
 988                                       "%d CE %s on %s (%s %s - %s)\n",
 989                                       error_count,
 990                                       msg, label, location,
 991                                       detail, other_detail);
 992                else
 993                        edac_mc_printk(mci, KERN_WARNING,
 994                                       "%d CE %s on %s (%s %s)\n",
 995                                       error_count,
 996                                       msg, label, location,
 997                                       detail);
 998        }
 999        edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1000
1001        if (mci->scrub_mode & SCRUB_SW_SRC) {
1002                /*
1003                        * Some memory controllers (called MCs below) can remap
1004                        * memory so that it is still available at a different
1005                        * address when PCI devices map into memory.
1006                        * MC's that can't do this, lose the memory where PCI
1007                        * devices are mapped. This mapping is MC-dependent
1008                        * and so we call back into the MC driver for it to
1009                        * map the MC page to a physical (CPU) page which can
1010                        * then be mapped to a virtual page - which can then
1011                        * be scrubbed.
1012                        */
1013                remapped_page = mci->ctl_page_to_phys ?
1014                        mci->ctl_page_to_phys(mci, page_frame_number) :
1015                        page_frame_number;
1016
1017                edac_mc_scrub_block(remapped_page,
1018                                        offset_in_page, grain);
1019        }
1020}
1021
1022static void edac_ue_error(struct mem_ctl_info *mci,
1023                          const u16 error_count,
1024                          const int pos[EDAC_MAX_LAYERS],
1025                          const char *msg,
1026                          const char *location,
1027                          const char *label,
1028                          const char *detail,
1029                          const char *other_detail,
1030                          const bool enable_per_layer_report)
1031{
1032        if (edac_mc_get_log_ue()) {
1033                if (other_detail && *other_detail)
1034                        edac_mc_printk(mci, KERN_WARNING,
1035                                       "%d UE %s on %s (%s %s - %s)\n",
1036                                       error_count,
1037                                       msg, label, location, detail,
1038                                       other_detail);
1039                else
1040                        edac_mc_printk(mci, KERN_WARNING,
1041                                       "%d UE %s on %s (%s %s)\n",
1042                                       error_count,
1043                                       msg, label, location, detail);
1044        }
1045
1046        if (edac_mc_get_panic_on_ue()) {
1047                if (other_detail && *other_detail)
1048                        panic("UE %s on %s (%s%s - %s)\n",
1049                              msg, label, location, detail, other_detail);
1050                else
1051                        panic("UE %s on %s (%s%s)\n",
1052                              msg, label, location, detail);
1053        }
1054
1055        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1056}
1057
1058#define OTHER_LABEL " or "
1059
1060/**
1061 * edac_mc_handle_error - reports a memory event to userspace
1062 *
1063 * @type:               severity of the error (CE/UE/Fatal)
1064 * @mci:                a struct mem_ctl_info pointer
1065 * @error_count:        Number of errors of the same type
1066 * @page_frame_number:  mem page where the error occurred
1067 * @offset_in_page:     offset of the error inside the page
1068 * @syndrome:           ECC syndrome
1069 * @top_layer:          Memory layer[0] position
1070 * @mid_layer:          Memory layer[1] position
1071 * @low_layer:          Memory layer[2] position
1072 * @msg:                Message meaningful to the end users that
1073 *                      explains the event
1074 * @other_detail:       Technical details about the event that
1075 *                      may help hardware manufacturers and
1076 *                      EDAC developers to analyse the event
1077 */
1078void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1079                          struct mem_ctl_info *mci,
1080                          const u16 error_count,
1081                          const unsigned long page_frame_number,
1082                          const unsigned long offset_in_page,
1083                          const unsigned long syndrome,
1084                          const int top_layer,
1085                          const int mid_layer,
1086                          const int low_layer,
1087                          const char *msg,
1088                          const char *other_detail)
1089{
1090        /* FIXME: too much for stack: move it to some pre-alocated area */
1091        char detail[80], location[80];
1092        char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1093        char *p;
1094        int row = -1, chan = -1;
1095        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1096        int i;
1097        long grain;
1098        bool enable_per_layer_report = false;
1099        u8 grain_bits;
1100
1101        edac_dbg(3, "MC%d\n", mci->mc_idx);
1102
1103        /*
1104         * Check if the event report is consistent and if the memory
1105         * location is known. If it is known, enable_per_layer_report will be
1106         * true, the DIMM(s) label info will be filled and the per-layer
1107         * error counters will be incremented.
1108         */
1109        for (i = 0; i < mci->n_layers; i++) {
1110                if (pos[i] >= (int)mci->layers[i].size) {
1111                        if (type == HW_EVENT_ERR_CORRECTED)
1112                                p = "CE";
1113                        else
1114                                p = "UE";
1115
1116                        edac_mc_printk(mci, KERN_ERR,
1117                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1118                                       edac_layer_name[mci->layers[i].type],
1119                                       pos[i], mci->layers[i].size);
1120                        /*
1121                         * Instead of just returning it, let's use what's
1122                         * known about the error. The increment routines and
1123                         * the DIMM filter logic will do the right thing by
1124                         * pointing the likely damaged DIMMs.
1125                         */
1126                        pos[i] = -1;
1127                }
1128                if (pos[i] >= 0)
1129                        enable_per_layer_report = true;
1130        }
1131
1132        /*
1133         * Get the dimm label/grain that applies to the match criteria.
1134         * As the error algorithm may not be able to point to just one memory
1135         * stick, the logic here will get all possible labels that could
1136         * pottentially be affected by the error.
1137         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1138         * to have only the MC channel and the MC dimm (also called "branch")
1139         * but the channel is not known, as the memory is arranged in pairs,
1140         * where each memory belongs to a separate channel within the same
1141         * branch.
1142         */
1143        grain = 0;
1144        p = label;
1145        *p = '\0';
1146        for (i = 0; i < mci->tot_dimms; i++) {
1147                struct dimm_info *dimm = mci->dimms[i];
1148
1149                if (top_layer >= 0 && top_layer != dimm->location[0])
1150                        continue;
1151                if (mid_layer >= 0 && mid_layer != dimm->location[1])
1152                        continue;
1153                if (low_layer >= 0 && low_layer != dimm->location[2])
1154                        continue;
1155
1156                /* get the max grain, over the error match range */
1157                if (dimm->grain > grain)
1158                        grain = dimm->grain;
1159
1160                /*
1161                 * If the error is memory-controller wide, there's no need to
1162                 * seek for the affected DIMMs because the whole
1163                 * channel/memory controller/...  may be affected.
1164                 * Also, don't show errors for empty DIMM slots.
1165                 */
1166                if (enable_per_layer_report && dimm->nr_pages) {
1167                        if (p != label) {
1168                                strcpy(p, OTHER_LABEL);
1169                                p += strlen(OTHER_LABEL);
1170                        }
1171                        strcpy(p, dimm->label);
1172                        p += strlen(p);
1173                        *p = '\0';
1174
1175                        /*
1176                         * get csrow/channel of the DIMM, in order to allow
1177                         * incrementing the compat API counters
1178                         */
1179                        edac_dbg(4, "%s csrows map: (%d,%d)\n",
1180                                 mci->mem_is_per_rank ? "rank" : "dimm",
1181                                 dimm->csrow, dimm->cschannel);
1182                        if (row == -1)
1183                                row = dimm->csrow;
1184                        else if (row >= 0 && row != dimm->csrow)
1185                                row = -2;
1186
1187                        if (chan == -1)
1188                                chan = dimm->cschannel;
1189                        else if (chan >= 0 && chan != dimm->cschannel)
1190                                chan = -2;
1191                }
1192        }
1193
1194        if (!enable_per_layer_report) {
1195                strcpy(label, "any memory");
1196        } else {
1197                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1198                if (p == label)
1199                        strcpy(label, "unknown memory");
1200                if (type == HW_EVENT_ERR_CORRECTED) {
1201                        if (row >= 0) {
1202                                mci->csrows[row]->ce_count += error_count;
1203                                if (chan >= 0)
1204                                        mci->csrows[row]->channels[chan]->ce_count += error_count;
1205                        }
1206                } else
1207                        if (row >= 0)
1208                                mci->csrows[row]->ue_count += error_count;
1209        }
1210
1211        /* Fill the RAM location data */
1212        p = location;
1213        for (i = 0; i < mci->n_layers; i++) {
1214                if (pos[i] < 0)
1215                        continue;
1216
1217                p += sprintf(p, "%s:%d ",
1218                             edac_layer_name[mci->layers[i].type],
1219                             pos[i]);
1220        }
1221        if (p > location)
1222                *(p - 1) = '\0';
1223
1224        /* Report the error via the trace interface */
1225
1226        grain_bits = fls_long(grain) + 1;
1227        trace_mc_event(type, msg, label, error_count,
1228                       mci->mc_idx, top_layer, mid_layer, low_layer,
1229                       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1230                       grain_bits, syndrome, other_detail);
1231
1232        /* Memory type dependent details about the error */
1233        if (type == HW_EVENT_ERR_CORRECTED) {
1234                snprintf(detail, sizeof(detail),
1235                        "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1236                        page_frame_number, offset_in_page,
1237                        grain, syndrome);
1238                edac_ce_error(mci, error_count, pos, msg, location, label,
1239                              detail, other_detail, enable_per_layer_report,
1240                              page_frame_number, offset_in_page, grain);
1241        } else {
1242                snprintf(detail, sizeof(detail),
1243                        "page:0x%lx offset:0x%lx grain:%ld",
1244                        page_frame_number, offset_in_page, grain);
1245
1246                edac_ue_error(mci, error_count, pos, msg, location, label,
1247                              detail, other_detail, enable_per_layer_report);
1248        }
1249}
1250EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1251
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.