linux/arch/x86/kernel/cpu/mtrr/main.c
<<
>>
Prefs
   1/*  Generic MTRR (Memory Type Range Register) driver.
   2
   3    Copyright (C) 1997-2000  Richard Gooch
   4    Copyright (c) 2002       Patrick Mochel
   5
   6    This library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Library General Public
   8    License as published by the Free Software Foundation; either
   9    version 2 of the License, or (at your option) any later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Library General Public License for more details.
  15
  16    You should have received a copy of the GNU Library General Public
  17    License along with this library; if not, write to the Free
  18    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
  21    The postal address is:
  22      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
  23
  24    Source: "Pentium Pro Family Developer's Manual, Volume 3:
  25    Operating System Writer's Guide" (Intel document number 242692),
  26    section 11.11.7
  27
  28    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 
  29    on 6-7 March 2002. 
  30    Source: Intel Architecture Software Developers Manual, Volume 3: 
  31    System Programming Guide; Section 9.11. (1997 edition - PPro).
  32*/
  33
  34#include <linux/module.h>
  35#include <linux/init.h>
  36#include <linux/pci.h>
  37#include <linux/smp.h>
  38#include <linux/cpu.h>
  39#include <linux/mutex.h>
  40#include <linux/sort.h>
  41
  42#include <asm/e820.h>
  43#include <asm/mtrr.h>
  44#include <asm/uaccess.h>
  45#include <asm/processor.h>
  46#include <asm/msr.h>
  47#include <asm/kvm_para.h>
  48#include "mtrr.h"
  49
  50u32 num_var_ranges = 0;
  51
  52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
  53static DEFINE_MUTEX(mtrr_mutex);
  54
  55u64 size_or_mask, size_and_mask;
  56
  57static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
  58
  59struct mtrr_ops * mtrr_if = NULL;
  60
  61static void set_mtrr(unsigned int reg, unsigned long base,
  62                     unsigned long size, mtrr_type type);
  63
  64void set_mtrr_ops(struct mtrr_ops * ops)
  65{
  66        if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
  67                mtrr_ops[ops->vendor] = ops;
  68}
  69
  70/*  Returns non-zero if we have the write-combining memory type  */
  71static int have_wrcomb(void)
  72{
  73        struct pci_dev *dev;
  74        u8 rev;
  75        
  76        if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
  77                /* ServerWorks LE chipsets < rev 6 have problems with write-combining
  78                   Don't allow it and leave room for other chipsets to be tagged */
  79                if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
  80                    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
  81                        pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
  82                        if (rev <= 5) {
  83                                printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
  84                                pci_dev_put(dev);
  85                                return 0;
  86                        }
  87                }
  88                /* Intel 450NX errata # 23. Non ascending cacheline evictions to
  89                   write combining memory may resulting in data corruption */
  90                if (dev->vendor == PCI_VENDOR_ID_INTEL &&
  91                    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
  92                        printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
  93                        pci_dev_put(dev);
  94                        return 0;
  95                }
  96                pci_dev_put(dev);
  97        }               
  98        return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
  99}
 100
 101/*  This function returns the number of variable MTRRs  */
 102static void __init set_num_var_ranges(void)
 103{
 104        unsigned long config = 0, dummy;
 105
 106        if (use_intel()) {
 107                rdmsr(MTRRcap_MSR, config, dummy);
 108        } else if (is_cpu(AMD))
 109                config = 2;
 110        else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
 111                config = 8;
 112        num_var_ranges = config & 0xff;
 113}
 114
 115static void __init init_table(void)
 116{
 117        int i, max;
 118
 119        max = num_var_ranges;
 120        for (i = 0; i < max; i++)
 121                mtrr_usage_table[i] = 1;
 122}
 123
 124struct set_mtrr_data {
 125        atomic_t        count;
 126        atomic_t        gate;
 127        unsigned long   smp_base;
 128        unsigned long   smp_size;
 129        unsigned int    smp_reg;
 130        mtrr_type       smp_type;
 131};
 132
 133static void ipi_handler(void *info)
 134/*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
 135    [RETURNS] Nothing.
 136*/
 137{
 138#ifdef CONFIG_SMP
 139        struct set_mtrr_data *data = info;
 140        unsigned long flags;
 141
 142        local_irq_save(flags);
 143
 144        atomic_dec(&data->count);
 145        while(!atomic_read(&data->gate))
 146                cpu_relax();
 147
 148        /*  The master has cleared me to execute  */
 149        if (data->smp_reg != ~0U) 
 150                mtrr_if->set(data->smp_reg, data->smp_base, 
 151                             data->smp_size, data->smp_type);
 152        else
 153                mtrr_if->set_all();
 154
 155        atomic_dec(&data->count);
 156        while(atomic_read(&data->gate))
 157                cpu_relax();
 158
 159        atomic_dec(&data->count);
 160        local_irq_restore(flags);
 161#endif
 162}
 163
 164static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
 165        return type1 == MTRR_TYPE_UNCACHABLE ||
 166               type2 == MTRR_TYPE_UNCACHABLE ||
 167               (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
 168               (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
 169}
 170
 171/**
 172 * set_mtrr - update mtrrs on all processors
 173 * @reg:        mtrr in question
 174 * @base:       mtrr base
 175 * @size:       mtrr size
 176 * @type:       mtrr type
 177 *
 178 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
 179 * 
 180 * 1. Send IPI to do the following:
 181 * 2. Disable Interrupts
 182 * 3. Wait for all procs to do so 
 183 * 4. Enter no-fill cache mode
 184 * 5. Flush caches
 185 * 6. Clear PGE bit
 186 * 7. Flush all TLBs
 187 * 8. Disable all range registers
 188 * 9. Update the MTRRs
 189 * 10. Enable all range registers
 190 * 11. Flush all TLBs and caches again
 191 * 12. Enter normal cache mode and reenable caching
 192 * 13. Set PGE 
 193 * 14. Wait for buddies to catch up
 194 * 15. Enable interrupts.
 195 * 
 196 * What does that mean for us? Well, first we set data.count to the number
 197 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
 198 * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
 199 * Meanwhile, they are waiting for that flag to be set. Once it's set, each 
 200 * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it 
 201 * differently, so we call mtrr_if->set() callback and let them take care of it.
 202 * When they're done, they again decrement data->count and wait for data.gate to 
 203 * be reset. 
 204 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
 205 * Everyone then enables interrupts and we all continue on.
 206 *
 207 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
 208 * becomes nops.
 209 */
 210static void set_mtrr(unsigned int reg, unsigned long base,
 211                     unsigned long size, mtrr_type type)
 212{
 213        struct set_mtrr_data data;
 214        unsigned long flags;
 215
 216        data.smp_reg = reg;
 217        data.smp_base = base;
 218        data.smp_size = size;
 219        data.smp_type = type;
 220        atomic_set(&data.count, num_booting_cpus() - 1);
 221        /* make sure data.count is visible before unleashing other CPUs */
 222        smp_wmb();
 223        atomic_set(&data.gate,0);
 224
 225        /*  Start the ball rolling on other CPUs  */
 226        if (smp_call_function(ipi_handler, &data, 0) != 0)
 227                panic("mtrr: timed out waiting for other CPUs\n");
 228
 229        local_irq_save(flags);
 230
 231        while(atomic_read(&data.count))
 232                cpu_relax();
 233
 234        /* ok, reset count and toggle gate */
 235        atomic_set(&data.count, num_booting_cpus() - 1);
 236        smp_wmb();
 237        atomic_set(&data.gate,1);
 238
 239        /* do our MTRR business */
 240
 241        /* HACK!
 242         * We use this same function to initialize the mtrrs on boot.
 243         * The state of the boot cpu's mtrrs has been saved, and we want
 244         * to replicate across all the APs. 
 245         * If we're doing that @reg is set to something special...
 246         */
 247        if (reg != ~0U) 
 248                mtrr_if->set(reg,base,size,type);
 249
 250        /* wait for the others */
 251        while(atomic_read(&data.count))
 252                cpu_relax();
 253
 254        atomic_set(&data.count, num_booting_cpus() - 1);
 255        smp_wmb();
 256        atomic_set(&data.gate,0);
 257
 258        /*
 259         * Wait here for everyone to have seen the gate change
 260         * So we're the last ones to touch 'data'
 261         */
 262        while(atomic_read(&data.count))
 263                cpu_relax();
 264
 265        local_irq_restore(flags);
 266}
 267
 268/**
 269 *      mtrr_add_page - Add a memory type region
 270 *      @base: Physical base address of region in pages (in units of 4 kB!)
 271 *      @size: Physical size of region in pages (4 kB)
 272 *      @type: Type of MTRR desired
 273 *      @increment: If this is true do usage counting on the region
 274 *
 275 *      Memory type region registers control the caching on newer Intel and
 276 *      non Intel processors. This function allows drivers to request an
 277 *      MTRR is added. The details and hardware specifics of each processor's
 278 *      implementation are hidden from the caller, but nevertheless the 
 279 *      caller should expect to need to provide a power of two size on an
 280 *      equivalent power of two boundary.
 281 *
 282 *      If the region cannot be added either because all regions are in use
 283 *      or the CPU cannot support it a negative value is returned. On success
 284 *      the register number for this entry is returned, but should be treated
 285 *      as a cookie only.
 286 *
 287 *      On a multiprocessor machine the changes are made to all processors.
 288 *      This is required on x86 by the Intel processors.
 289 *
 290 *      The available types are
 291 *
 292 *      %MTRR_TYPE_UNCACHABLE   -       No caching
 293 *
 294 *      %MTRR_TYPE_WRBACK       -       Write data back in bursts whenever
 295 *
 296 *      %MTRR_TYPE_WRCOMB       -       Write data back soon but allow bursts
 297 *
 298 *      %MTRR_TYPE_WRTHROUGH    -       Cache reads but not writes
 299 *
 300 *      BUGS: Needs a quiet flag for the cases where drivers do not mind
 301 *      failures and do not wish system log messages to be sent.
 302 */
 303
 304int mtrr_add_page(unsigned long base, unsigned long size, 
 305                  unsigned int type, bool increment)
 306{
 307        int i, replace, error;
 308        mtrr_type ltype;
 309        unsigned long lbase, lsize;
 310
 311        if (!mtrr_if)
 312                return -ENXIO;
 313                
 314        if ((error = mtrr_if->validate_add_page(base,size,type)))
 315                return error;
 316
 317        if (type >= MTRR_NUM_TYPES) {
 318                printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
 319                return -EINVAL;
 320        }
 321
 322        /*  If the type is WC, check that this processor supports it  */
 323        if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
 324                printk(KERN_WARNING
 325                       "mtrr: your processor doesn't support write-combining\n");
 326                return -ENOSYS;
 327        }
 328
 329        if (!size) {
 330                printk(KERN_WARNING "mtrr: zero sized request\n");
 331                return -EINVAL;
 332        }
 333
 334        if (base & size_or_mask || size & size_or_mask) {
 335                printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
 336                return -EINVAL;
 337        }
 338
 339        error = -EINVAL;
 340        replace = -1;
 341
 342        /* No CPU hotplug when we change MTRR entries */
 343        get_online_cpus();
 344        /*  Search for existing MTRR  */
 345        mutex_lock(&mtrr_mutex);
 346        for (i = 0; i < num_var_ranges; ++i) {
 347                mtrr_if->get(i, &lbase, &lsize, &ltype);
 348                if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
 349                        continue;
 350                /*  At this point we know there is some kind of overlap/enclosure  */
 351                if (base < lbase || base + size - 1 > lbase + lsize - 1) {
 352                        if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
 353                                /*  New region encloses an existing region  */
 354                                if (type == ltype) {
 355                                        replace = replace == -1 ? i : -2;
 356                                        continue;
 357                                }
 358                                else if (types_compatible(type, ltype))
 359                                        continue;
 360                        }
 361                        printk(KERN_WARNING
 362                               "mtrr: 0x%lx000,0x%lx000 overlaps existing"
 363                               " 0x%lx000,0x%lx000\n", base, size, lbase,
 364                               lsize);
 365                        goto out;
 366                }
 367                /*  New region is enclosed by an existing region  */
 368                if (ltype != type) {
 369                        if (types_compatible(type, ltype))
 370                                continue;
 371                        printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
 372                             base, size, mtrr_attrib_to_str(ltype),
 373                             mtrr_attrib_to_str(type));
 374                        goto out;
 375                }
 376                if (increment)
 377                        ++mtrr_usage_table[i];
 378                error = i;
 379                goto out;
 380        }
 381        /*  Search for an empty MTRR  */
 382        i = mtrr_if->get_free_region(base, size, replace);
 383        if (i >= 0) {
 384                set_mtrr(i, base, size, type);
 385                if (likely(replace < 0)) {
 386                        mtrr_usage_table[i] = 1;
 387                } else {
 388                        mtrr_usage_table[i] = mtrr_usage_table[replace];
 389                        if (increment)
 390                                mtrr_usage_table[i]++;
 391                        if (unlikely(replace != i)) {
 392                                set_mtrr(replace, 0, 0, 0);
 393                                mtrr_usage_table[replace] = 0;
 394                        }
 395                }
 396        } else
 397                printk(KERN_INFO "mtrr: no more MTRRs available\n");
 398        error = i;
 399 out:
 400        mutex_unlock(&mtrr_mutex);
 401        put_online_cpus();
 402        return error;
 403}
 404
 405static int mtrr_check(unsigned long base, unsigned long size)
 406{
 407        if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
 408                printk(KERN_WARNING
 409                        "mtrr: size and base must be multiples of 4 kiB\n");
 410                printk(KERN_DEBUG
 411                        "mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 412                dump_stack();
 413                return -1;
 414        }
 415        return 0;
 416}
 417
 418/**
 419 *      mtrr_add - Add a memory type region
 420 *      @base: Physical base address of region
 421 *      @size: Physical size of region
 422 *      @type: Type of MTRR desired
 423 *      @increment: If this is true do usage counting on the region
 424 *
 425 *      Memory type region registers control the caching on newer Intel and
 426 *      non Intel processors. This function allows drivers to request an
 427 *      MTRR is added. The details and hardware specifics of each processor's
 428 *      implementation are hidden from the caller, but nevertheless the 
 429 *      caller should expect to need to provide a power of two size on an
 430 *      equivalent power of two boundary.
 431 *
 432 *      If the region cannot be added either because all regions are in use
 433 *      or the CPU cannot support it a negative value is returned. On success
 434 *      the register number for this entry is returned, but should be treated
 435 *      as a cookie only.
 436 *
 437 *      On a multiprocessor machine the changes are made to all processors.
 438 *      This is required on x86 by the Intel processors.
 439 *
 440 *      The available types are
 441 *
 442 *      %MTRR_TYPE_UNCACHABLE   -       No caching
 443 *
 444 *      %MTRR_TYPE_WRBACK       -       Write data back in bursts whenever
 445 *
 446 *      %MTRR_TYPE_WRCOMB       -       Write data back soon but allow bursts
 447 *
 448 *      %MTRR_TYPE_WRTHROUGH    -       Cache reads but not writes
 449 *
 450 *      BUGS: Needs a quiet flag for the cases where drivers do not mind
 451 *      failures and do not wish system log messages to be sent.
 452 */
 453
 454int
 455mtrr_add(unsigned long base, unsigned long size, unsigned int type,
 456         bool increment)
 457{
 458        if (mtrr_check(base, size))
 459                return -EINVAL;
 460        return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
 461                             increment);
 462}
 463
 464/**
 465 *      mtrr_del_page - delete a memory type region
 466 *      @reg: Register returned by mtrr_add
 467 *      @base: Physical base address
 468 *      @size: Size of region
 469 *
 470 *      If register is supplied then base and size are ignored. This is
 471 *      how drivers should call it.
 472 *
 473 *      Releases an MTRR region. If the usage count drops to zero the 
 474 *      register is freed and the region returns to default state.
 475 *      On success the register is returned, on failure a negative error
 476 *      code.
 477 */
 478
 479int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 480{
 481        int i, max;
 482        mtrr_type ltype;
 483        unsigned long lbase, lsize;
 484        int error = -EINVAL;
 485
 486        if (!mtrr_if)
 487                return -ENXIO;
 488
 489        max = num_var_ranges;
 490        /* No CPU hotplug when we change MTRR entries */
 491        get_online_cpus();
 492        mutex_lock(&mtrr_mutex);
 493        if (reg < 0) {
 494                /*  Search for existing MTRR  */
 495                for (i = 0; i < max; ++i) {
 496                        mtrr_if->get(i, &lbase, &lsize, &ltype);
 497                        if (lbase == base && lsize == size) {
 498                                reg = i;
 499                                break;
 500                        }
 501                }
 502                if (reg < 0) {
 503                        printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
 504                               size);
 505                        goto out;
 506                }
 507        }
 508        if (reg >= max) {
 509                printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
 510                goto out;
 511        }
 512        mtrr_if->get(reg, &lbase, &lsize, &ltype);
 513        if (lsize < 1) {
 514                printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
 515                goto out;
 516        }
 517        if (mtrr_usage_table[reg] < 1) {
 518                printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
 519                goto out;
 520        }
 521        if (--mtrr_usage_table[reg] < 1)
 522                set_mtrr(reg, 0, 0, 0);
 523        error = reg;
 524 out:
 525        mutex_unlock(&mtrr_mutex);
 526        put_online_cpus();
 527        return error;
 528}
 529/**
 530 *      mtrr_del - delete a memory type region
 531 *      @reg: Register returned by mtrr_add
 532 *      @base: Physical base address
 533 *      @size: Size of region
 534 *
 535 *      If register is supplied then base and size are ignored. This is
 536 *      how drivers should call it.
 537 *
 538 *      Releases an MTRR region. If the usage count drops to zero the 
 539 *      register is freed and the region returns to default state.
 540 *      On success the register is returned, on failure a negative error
 541 *      code.
 542 */
 543
 544int
 545mtrr_del(int reg, unsigned long base, unsigned long size)
 546{
 547        if (mtrr_check(base, size))
 548                return -EINVAL;
 549        return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
 550}
 551
 552EXPORT_SYMBOL(mtrr_add);
 553EXPORT_SYMBOL(mtrr_del);
 554
 555/* HACK ALERT!
 556 * These should be called implicitly, but we can't yet until all the initcall
 557 * stuff is done...
 558 */
 559static void __init init_ifs(void)
 560{
 561#ifndef CONFIG_X86_64
 562        amd_init_mtrr();
 563        cyrix_init_mtrr();
 564        centaur_init_mtrr();
 565#endif
 566}
 567
 568/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
 569 * MTRR driver doesn't require this
 570 */
 571struct mtrr_value {
 572        mtrr_type       ltype;
 573        unsigned long   lbase;
 574        unsigned long   lsize;
 575};
 576
 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
 578
 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 580{
 581        int i;
 582
 583        for (i = 0; i < num_var_ranges; i++) {
 584                mtrr_if->get(i,
 585                             &mtrr_state[i].lbase,
 586                             &mtrr_state[i].lsize,
 587                             &mtrr_state[i].ltype);
 588        }
 589        return 0;
 590}
 591
 592static int mtrr_restore(struct sys_device * sysdev)
 593{
 594        int i;
 595
 596        for (i = 0; i < num_var_ranges; i++) {
 597                if (mtrr_state[i].lsize) 
 598                        set_mtrr(i,
 599                                 mtrr_state[i].lbase,
 600                                 mtrr_state[i].lsize,
 601                                 mtrr_state[i].ltype);
 602        }
 603        return 0;
 604}
 605
 606
 607
 608static struct sysdev_driver mtrr_sysdev_driver = {
 609        .suspend        = mtrr_save,
 610        .resume         = mtrr_restore,
 611};
 612
 613/* should be related to MTRR_VAR_RANGES nums */
 614#define RANGE_NUM 256
 615
 616struct res_range {
 617        unsigned long start;
 618        unsigned long end;
 619};
 620
 621static int __init
 622add_range(struct res_range *range, int nr_range, unsigned long start,
 623                              unsigned long end)
 624{
 625        /* out of slots */
 626        if (nr_range >= RANGE_NUM)
 627                return nr_range;
 628
 629        range[nr_range].start = start;
 630        range[nr_range].end = end;
 631
 632        nr_range++;
 633
 634        return nr_range;
 635}
 636
 637static int __init
 638add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
 639                              unsigned long end)
 640{
 641        int i;
 642
 643        /* try to merge it with old one */
 644        for (i = 0; i < nr_range; i++) {
 645                unsigned long final_start, final_end;
 646                unsigned long common_start, common_end;
 647
 648                if (!range[i].end)
 649                        continue;
 650
 651                common_start = max(range[i].start, start);
 652                common_end = min(range[i].end, end);
 653                if (common_start > common_end + 1)
 654                        continue;
 655
 656                final_start = min(range[i].start, start);
 657                final_end = max(range[i].end, end);
 658
 659                range[i].start = final_start;
 660                range[i].end =  final_end;
 661                return nr_range;
 662        }
 663
 664        /* need to add that */
 665        return add_range(range, nr_range, start, end);
 666}
 667
 668static void __init
 669subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 670{
 671        int i, j;
 672
 673        for (j = 0; j < RANGE_NUM; j++) {
 674                if (!range[j].end)
 675                        continue;
 676
 677                if (start <= range[j].start && end >= range[j].end) {
 678                        range[j].start = 0;
 679                        range[j].end = 0;
 680                        continue;
 681                }
 682
 683                if (start <= range[j].start && end < range[j].end &&
 684                    range[j].start < end + 1) {
 685                        range[j].start = end + 1;
 686                        continue;
 687                }
 688
 689
 690                if (start > range[j].start && end >= range[j].end &&
 691                    range[j].end > start - 1) {
 692                        range[j].end = start - 1;
 693                        continue;
 694                }
 695
 696                if (start > range[j].start && end < range[j].end) {
 697                        /* find the new spare */
 698                        for (i = 0; i < RANGE_NUM; i++) {
 699                                if (range[i].end == 0)
 700                                        break;
 701                        }
 702                        if (i < RANGE_NUM) {
 703                                range[i].end = range[j].end;
 704                                range[i].start = end + 1;
 705                        } else {
 706                                printk(KERN_ERR "run of slot in ranges\n");
 707                        }
 708                        range[j].end = start - 1;
 709                        continue;
 710                }
 711        }
 712}
 713
 714static int __init cmp_range(const void *x1, const void *x2)
 715{
 716        const struct res_range *r1 = x1;
 717        const struct res_range *r2 = x2;
 718        long start1, start2;
 719
 720        start1 = r1->start;
 721        start2 = r2->start;
 722
 723        return start1 - start2;
 724}
 725
 726struct var_mtrr_range_state {
 727        unsigned long base_pfn;
 728        unsigned long size_pfn;
 729        mtrr_type type;
 730};
 731
 732static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
 733static int __initdata debug_print;
 734
 735static int __init
 736x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 737                       unsigned long extra_remove_base,
 738                       unsigned long extra_remove_size)
 739{
 740        unsigned long i, base, size;
 741        mtrr_type type;
 742
 743        for (i = 0; i < num_var_ranges; i++) {
 744                type = range_state[i].type;
 745                if (type != MTRR_TYPE_WRBACK)
 746                        continue;
 747                base = range_state[i].base_pfn;
 748                size = range_state[i].size_pfn;
 749                nr_range = add_range_with_merge(range, nr_range, base,
 750                                                base + size - 1);
 751        }
 752        if (debug_print) {
 753                printk(KERN_DEBUG "After WB checking\n");
 754                for (i = 0; i < nr_range; i++)
 755                        printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 756                                 range[i].start, range[i].end + 1);
 757        }
 758
 759        /* take out UC ranges */
 760        for (i = 0; i < num_var_ranges; i++) {
 761                type = range_state[i].type;
 762                if (type != MTRR_TYPE_UNCACHABLE &&
 763                    type != MTRR_TYPE_WRPROT)
 764                        continue;
 765                size = range_state[i].size_pfn;
 766                if (!size)
 767                        continue;
 768                base = range_state[i].base_pfn;
 769                subtract_range(range, base, base + size - 1);
 770        }
 771        if (extra_remove_size)
 772                subtract_range(range, extra_remove_base,
 773                                 extra_remove_base + extra_remove_size  - 1);
 774
 775        /* get new range num */
 776        nr_range = 0;
 777        for (i = 0; i < RANGE_NUM; i++) {
 778                if (!range[i].end)
 779                        continue;
 780                nr_range++;
 781        }
 782        if  (debug_print) {
 783                printk(KERN_DEBUG "After UC checking\n");
 784                for (i = 0; i < nr_range; i++)
 785                        printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 786                                 range[i].start, range[i].end + 1);
 787        }
 788
 789        /* sort the ranges */
 790        sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
 791        if  (debug_print) {
 792                printk(KERN_DEBUG "After sorting\n");
 793                for (i = 0; i < nr_range; i++)
 794                        printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 795                                 range[i].start, range[i].end + 1);
 796        }
 797
 798        /* clear those is not used */
 799        for (i = nr_range; i < RANGE_NUM; i++)
 800                memset(&range[i], 0, sizeof(range[i]));
 801
 802        return nr_range;
 803}
 804
 805static struct res_range __initdata range[RANGE_NUM];
 806static int __initdata nr_range;
 807
 808#ifdef CONFIG_MTRR_SANITIZER
 809
 810static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
 811{
 812        unsigned long sum;
 813        int i;
 814
 815        sum = 0;
 816        for (i = 0; i < nr_range; i++)
 817                sum += range[i].end + 1 - range[i].start;
 818
 819        return sum;
 820}
 821
 822static int enable_mtrr_cleanup __initdata =
 823        CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
 824
 825static int __init disable_mtrr_cleanup_setup(char *str)
 826{
 827        enable_mtrr_cleanup = 0;
 828        return 0;
 829}
 830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
 831
 832static int __init enable_mtrr_cleanup_setup(char *str)
 833{
 834        enable_mtrr_cleanup = 1;
 835        return 0;
 836}
 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
 838
 839static int __init mtrr_cleanup_debug_setup(char *str)
 840{
 841        debug_print = 1;
 842        return 0;
 843}
 844early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
 845
 846struct var_mtrr_state {
 847        unsigned long   range_startk;
 848        unsigned long   range_sizek;
 849        unsigned long   chunk_sizek;
 850        unsigned long   gran_sizek;
 851        unsigned int    reg;
 852};
 853
 854static void __init
 855set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 856                unsigned char type, unsigned int address_bits)
 857{
 858        u32 base_lo, base_hi, mask_lo, mask_hi;
 859        u64 base, mask;
 860
 861        if (!sizek) {
 862                fill_mtrr_var_range(reg, 0, 0, 0, 0);
 863                return;
 864        }
 865
 866        mask = (1ULL << address_bits) - 1;
 867        mask &= ~((((u64)sizek) << 10) - 1);
 868
 869        base  = ((u64)basek) << 10;
 870
 871        base |= type;
 872        mask |= 0x800;
 873
 874        base_lo = base & ((1ULL<<32) - 1);
 875        base_hi = base >> 32;
 876
 877        mask_lo = mask & ((1ULL<<32) - 1);
 878        mask_hi = mask >> 32;
 879
 880        fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 881}
 882
 883static void __init
 884save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 885                unsigned char type)
 886{
 887        range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
 888        range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
 889        range_state[reg].type = type;
 890}
 891
 892static void __init
 893set_var_mtrr_all(unsigned int address_bits)
 894{
 895        unsigned long basek, sizek;
 896        unsigned char type;
 897        unsigned int reg;
 898
 899        for (reg = 0; reg < num_var_ranges; reg++) {
 900                basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
 901                sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
 902                type = range_state[reg].type;
 903
 904                set_var_mtrr(reg, basek, sizek, type, address_bits);
 905        }
 906}
 907
 908static unsigned long to_size_factor(unsigned long sizek, char *factorp)
 909{
 910        char factor;
 911        unsigned long base = sizek;
 912
 913        if (base & ((1<<10) - 1)) {
 914                /* not MB alignment */
 915                factor = 'K';
 916        } else if (base & ((1<<20) - 1)){
 917                factor = 'M';
 918                base >>= 10;
 919        } else {
 920                factor = 'G';
 921                base >>= 20;
 922        }
 923
 924        *factorp = factor;
 925
 926        return base;
 927}
 928
 929static unsigned int __init
 930range_to_mtrr(unsigned int reg, unsigned long range_startk,
 931              unsigned long range_sizek, unsigned char type)
 932{
 933        if (!range_sizek || (reg >= num_var_ranges))
 934                return reg;
 935
 936        while (range_sizek) {
 937                unsigned long max_align, align;
 938                unsigned long sizek;
 939
 940                /* Compute the maximum size I can make a range */
 941                if (range_startk)
 942                        max_align = ffs(range_startk) - 1;
 943                else
 944                        max_align = 32;
 945                align = fls(range_sizek) - 1;
 946                if (align > max_align)
 947                        align = max_align;
 948
 949                sizek = 1 << align;
 950                if (debug_print) {
 951                        char start_factor = 'K', size_factor = 'K';
 952                        unsigned long start_base, size_base;
 953
 954                        start_base = to_size_factor(range_startk, &start_factor),
 955                        size_base = to_size_factor(sizek, &size_factor),
 956
 957                        printk(KERN_DEBUG "Setting variable MTRR %d, "
 958                                "base: %ld%cB, range: %ld%cB, type %s\n",
 959                                reg, start_base, start_factor,
 960                                size_base, size_factor,
 961                                (type == MTRR_TYPE_UNCACHABLE)?"UC":
 962                                    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 963                                );
 964                }
 965                save_var_mtrr(reg++, range_startk, sizek, type);
 966                range_startk += sizek;
 967                range_sizek -= sizek;
 968                if (reg >= num_var_ranges)
 969                        break;
 970        }
 971        return reg;
 972}
 973
 974static unsigned __init
 975range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 976                        unsigned long sizek)
 977{
 978        unsigned long hole_basek, hole_sizek;
 979        unsigned long second_basek, second_sizek;
 980        unsigned long range0_basek, range0_sizek;
 981        unsigned long range_basek, range_sizek;
 982        unsigned long chunk_sizek;
 983        unsigned long gran_sizek;
 984
 985        hole_basek = 0;
 986        hole_sizek = 0;
 987        second_basek = 0;
 988        second_sizek = 0;
 989        chunk_sizek = state->chunk_sizek;
 990        gran_sizek = state->gran_sizek;
 991
 992        /* align with gran size, prevent small block used up MTRRs */
 993        range_basek = ALIGN(state->range_startk, gran_sizek);
 994        if ((range_basek > basek) && basek)
 995                return second_sizek;
 996        state->range_sizek -= (range_basek - state->range_startk);
 997        range_sizek = ALIGN(state->range_sizek, gran_sizek);
 998
 999        while (range_sizek > state->range_sizek) {
1000                range_sizek -= gran_sizek;
1001                if (!range_sizek)
1002                        return 0;
1003        }
1004        state->range_sizek = range_sizek;
1005
1006        /* try to append some small hole */
1007        range0_basek = state->range_startk;
1008        range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
1009
1010        /* no increase */
1011        if (range0_sizek == state->range_sizek) {
1012                if (debug_print)
1013                        printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
1014                                range0_basek<<10,
1015                                (range0_basek + state->range_sizek)<<10);
1016                state->reg = range_to_mtrr(state->reg, range0_basek,
1017                                state->range_sizek, MTRR_TYPE_WRBACK);
1018                return 0;
1019        }
1020
1021        /* only cut back, when it is not the last */
1022        if (sizek) {
1023                while (range0_basek + range0_sizek > (basek + sizek)) {
1024                        if (range0_sizek >= chunk_sizek)
1025                                range0_sizek -= chunk_sizek;
1026                        else
1027                                range0_sizek = 0;
1028
1029                        if (!range0_sizek)
1030                                break;
1031                }
1032        }
1033
1034second_try:
1035        range_basek = range0_basek + range0_sizek;
1036
1037        /* one hole in the middle */
1038        if (range_basek > basek && range_basek <= (basek + sizek))
1039                second_sizek = range_basek - basek;
1040
1041        if (range0_sizek > state->range_sizek) {
1042
1043                /* one hole in middle or at end */
1044                hole_sizek = range0_sizek - state->range_sizek - second_sizek;
1045
1046                /* hole size should be less than half of range0 size */
1047                if (hole_sizek >= (range0_sizek >> 1) &&
1048                    range0_sizek >= chunk_sizek) {
1049                        range0_sizek -= chunk_sizek;
1050                        second_sizek = 0;
1051                        hole_sizek = 0;
1052
1053                        goto second_try;
1054                }
1055        }
1056
1057        if (range0_sizek) {
1058                if (debug_print)
1059                        printk(KERN_DEBUG "range0: %016lx - %016lx\n",
1060                                range0_basek<<10,
1061                                (range0_basek + range0_sizek)<<10);
1062                state->reg = range_to_mtrr(state->reg, range0_basek,
1063                                range0_sizek, MTRR_TYPE_WRBACK);
1064        }
1065
1066        if (range0_sizek < state->range_sizek) {
1067                /* need to handle left over */
1068                range_sizek = state->range_sizek - range0_sizek;
1069
1070                if (debug_print)
1071                        printk(KERN_DEBUG "range: %016lx - %016lx\n",
1072                                 range_basek<<10,
1073                                 (range_basek + range_sizek)<<10);
1074                state->reg = range_to_mtrr(state->reg, range_basek,
1075                                 range_sizek, MTRR_TYPE_WRBACK);
1076        }
1077
1078        if (hole_sizek) {
1079                hole_basek = range_basek - hole_sizek - second_sizek;
1080                if (debug_print)
1081                        printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1082                                 hole_basek<<10,
1083                                 (hole_basek + hole_sizek)<<10);
1084                state->reg = range_to_mtrr(state->reg, hole_basek,
1085                                 hole_sizek, MTRR_TYPE_UNCACHABLE);
1086        }
1087
1088        return second_sizek;
1089}
1090
1091static void __init
1092set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
1093                   unsigned long size_pfn)
1094{
1095        unsigned long basek, sizek;
1096        unsigned long second_sizek = 0;
1097
1098        if (state->reg >= num_var_ranges)
1099                return;
1100
1101        basek = base_pfn << (PAGE_SHIFT - 10);
1102        sizek = size_pfn << (PAGE_SHIFT - 10);
1103
1104        /* See if I can merge with the last range */
1105        if ((basek <= 1024) ||
1106            (state->range_startk + state->range_sizek == basek)) {
1107                unsigned long endk = basek + sizek;
1108                state->range_sizek = endk - state->range_startk;
1109                return;
1110        }
1111        /* Write the range mtrrs */
1112        if (state->range_sizek != 0)
1113                second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
1114
1115        /* Allocate an msr */
1116        state->range_startk = basek + second_sizek;
1117        state->range_sizek  = sizek - second_sizek;
1118}
1119
1120/* mininum size of mtrr block that can take hole */
1121static u64 mtrr_chunk_size __initdata = (256ULL<<20);
1122
1123static int __init parse_mtrr_chunk_size_opt(char *p)
1124{
1125        if (!p)
1126                return -EINVAL;
1127        mtrr_chunk_size = memparse(p, &p);
1128        return 0;
1129}
1130early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
1131
1132/* granity of mtrr of block */
1133static u64 mtrr_gran_size __initdata;
1134
1135static int __init parse_mtrr_gran_size_opt(char *p)
1136{
1137        if (!p)
1138                return -EINVAL;
1139        mtrr_gran_size = memparse(p, &p);
1140        return 0;
1141}
1142early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
1143
1144static int nr_mtrr_spare_reg __initdata =
1145                                 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
1146
1147static int __init parse_mtrr_spare_reg(char *arg)
1148{
1149        if (arg)
1150                nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
1151        return 0;
1152}
1153
1154early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
1155
1156static int __init
1157x86_setup_var_mtrrs(struct res_range *range, int nr_range,
1158                    u64 chunk_size, u64 gran_size)
1159{
1160        struct var_mtrr_state var_state;
1161        int i;
1162        int num_reg;
1163
1164        var_state.range_startk  = 0;
1165        var_state.range_sizek   = 0;
1166        var_state.reg           = 0;
1167        var_state.chunk_sizek   = chunk_size >> 10;
1168        var_state.gran_sizek    = gran_size >> 10;
1169
1170        memset(range_state, 0, sizeof(range_state));
1171
1172        /* Write the range etc */
1173        for (i = 0; i < nr_range; i++)
1174                set_var_mtrr_range(&var_state, range[i].start,
1175                                   range[i].end - range[i].start + 1);
1176
1177        /* Write the last range */
1178        if (var_state.range_sizek != 0)
1179                range_to_mtrr_with_hole(&var_state, 0, 0);
1180
1181        num_reg = var_state.reg;
1182        /* Clear out the extra MTRR's */
1183        while (var_state.reg < num_var_ranges) {
1184                save_var_mtrr(var_state.reg, 0, 0, 0);
1185                var_state.reg++;
1186        }
1187
1188        return num_reg;
1189}
1190
1191struct mtrr_cleanup_result {
1192        unsigned long gran_sizek;
1193        unsigned long chunk_sizek;
1194        unsigned long lose_cover_sizek;
1195        unsigned int num_reg;
1196        int bad;
1197};
1198
1199/*
1200 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
1201 * chunk size: gran_size, ..., 2G
1202 * so we need (1+16)*8
1203 */
1204#define NUM_RESULT      136
1205#define PSHIFT          (PAGE_SHIFT - 10)
1206
1207static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1208static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1209
1210static void __init print_out_mtrr_range_state(void)
1211{
1212        int i;
1213        char start_factor = 'K', size_factor = 'K';
1214        unsigned long start_base, size_base;
1215        mtrr_type type;
1216
1217        for (i = 0; i < num_var_ranges; i++) {
1218
1219                size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
1220                if (!size_base)
1221                        continue;
1222
1223                size_base = to_size_factor(size_base, &size_factor),
1224                start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
1225                start_base = to_size_factor(start_base, &start_factor),
1226                type = range_state[i].type;
1227
1228                printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
1229                        i, start_base, start_factor,
1230                        size_base, size_factor,
1231                        (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
1232                            ((type == MTRR_TYPE_WRPROT) ? "WP" :
1233                             ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1234                        );
1235        }
1236}
1237
1238static int __init mtrr_need_cleanup(void)
1239{
1240        int i;
1241        mtrr_type type;
1242        unsigned long size;
1243        /* extra one for all 0 */
1244        int num[MTRR_NUM_TYPES + 1];
1245
1246        /* check entries number */
1247        memset(num, 0, sizeof(num));
1248        for (i = 0; i < num_var_ranges; i++) {
1249                type = range_state[i].type;
1250                size = range_state[i].size_pfn;
1251                if (type >= MTRR_NUM_TYPES)
1252                        continue;
1253                if (!size)
1254                        type = MTRR_NUM_TYPES;
1255                if (type == MTRR_TYPE_WRPROT)
1256                        type = MTRR_TYPE_UNCACHABLE;
1257                num[type]++;
1258        }
1259
1260        /* check if we got UC entries */
1261        if (!num[MTRR_TYPE_UNCACHABLE])
1262                return 0;
1263
1264        /* check if we only had WB and UC */
1265        if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1266                num_var_ranges - num[MTRR_NUM_TYPES])
1267                return 0;
1268
1269        return 1;
1270}
1271
1272static unsigned long __initdata range_sums;
1273static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
1274                                         unsigned long extra_remove_base,
1275                                         unsigned long extra_remove_size,
1276                                         int i)
1277{
1278        int num_reg;
1279        static struct res_range range_new[RANGE_NUM];
1280        static int nr_range_new;
1281        unsigned long range_sums_new;
1282
1283        /* convert ranges to var ranges state */
1284        num_reg = x86_setup_var_mtrrs(range, nr_range,
1285                                                chunk_size, gran_size);
1286
1287        /* we got new setting in range_state, check it */
1288        memset(range_new, 0, sizeof(range_new));
1289        nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1290                                extra_remove_base, extra_remove_size);
1291        range_sums_new = sum_ranges(range_new, nr_range_new);
1292
1293        result[i].chunk_sizek = chunk_size >> 10;
1294        result[i].gran_sizek = gran_size >> 10;
1295        result[i].num_reg = num_reg;
1296        if (range_sums < range_sums_new) {
1297                result[i].lose_cover_sizek =
1298                        (range_sums_new - range_sums) << PSHIFT;
1299                result[i].bad = 1;
1300        } else
1301                result[i].lose_cover_sizek =
1302                        (range_sums - range_sums_new) << PSHIFT;
1303
1304        /* double check it */
1305        if (!result[i].bad && !result[i].lose_cover_sizek) {
1306                if (nr_range_new != nr_range ||
1307                        memcmp(range, range_new, sizeof(range)))
1308                                result[i].bad = 1;
1309        }
1310
1311        if (!result[i].bad && (range_sums - range_sums_new <
1312                                min_loss_pfn[num_reg])) {
1313                min_loss_pfn[num_reg] =
1314                        range_sums - range_sums_new;
1315        }
1316}
1317
1318static void __init mtrr_print_out_one_result(int i)
1319{
1320        char gran_factor, chunk_factor, lose_factor;
1321        unsigned long gran_base, chunk_base, lose_base;
1322
1323        gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1324        chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1325        lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1326        printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1327                        result[i].bad ? "*BAD*" : " ",
1328                        gran_base, gran_factor, chunk_base, chunk_factor);
1329        printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
1330                        result[i].num_reg, result[i].bad ? "-" : "",
1331                        lose_base, lose_factor);
1332}
1333
1334static int __init mtrr_search_optimal_index(void)
1335{
1336        int i;
1337        int num_reg_good;
1338        int index_good;
1339
1340        if (nr_mtrr_spare_reg >= num_var_ranges)
1341                nr_mtrr_spare_reg = num_var_ranges - 1;
1342        num_reg_good = -1;
1343        for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1344                if (!min_loss_pfn[i])
1345                        num_reg_good = i;
1346        }
1347
1348        index_good = -1;
1349        if (num_reg_good != -1) {
1350                for (i = 0; i < NUM_RESULT; i++) {
1351                        if (!result[i].bad &&
1352                            result[i].num_reg == num_reg_good &&
1353                            !result[i].lose_cover_sizek) {
1354                                index_good = i;
1355                                break;
1356                        }
1357                }
1358        }
1359
1360        return index_good;
1361}
1362
1363
1364static int __init mtrr_cleanup(unsigned address_bits)
1365{
1366        unsigned long extra_remove_base, extra_remove_size;
1367        unsigned long base, size, def, dummy;
1368        mtrr_type type;
1369        u64 chunk_size, gran_size;
1370        int index_good;
1371        int i;
1372
1373        if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1374                return 0;
1375        rdmsr(MTRRdefType_MSR, def, dummy);
1376        def &= 0xff;
1377        if (def != MTRR_TYPE_UNCACHABLE)
1378                return 0;
1379
1380        /* get it and store it aside */
1381        memset(range_state, 0, sizeof(range_state));
1382        for (i = 0; i < num_var_ranges; i++) {
1383                mtrr_if->get(i, &base, &size, &type);
1384                range_state[i].base_pfn = base;
1385                range_state[i].size_pfn = size;
1386                range_state[i].type = type;
1387        }
1388
1389        /* check if we need handle it and can handle it */
1390        if (!mtrr_need_cleanup())
1391                return 0;
1392
1393        /* print original var MTRRs at first, for debugging: */
1394        printk(KERN_DEBUG "original variable MTRRs\n");
1395        print_out_mtrr_range_state();
1396
1397        memset(range, 0, sizeof(range));
1398        extra_remove_size = 0;
1399        extra_remove_base = 1 << (32 - PAGE_SHIFT);
1400        if (mtrr_tom2)
1401                extra_remove_size =
1402                        (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1403        nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1404                                          extra_remove_size);
1405        /*
1406         * [0, 1M) should always be coverred by var mtrr with WB
1407         * and fixed mtrrs should take effective before var mtrr for it
1408         */
1409        nr_range = add_range_with_merge(range, nr_range, 0,
1410                                        (1ULL<<(20 - PAGE_SHIFT)) - 1);
1411        /* sort the ranges */
1412        sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
1413
1414        range_sums = sum_ranges(range, nr_range);
1415        printk(KERN_INFO "total RAM coverred: %ldM\n",
1416               range_sums >> (20 - PAGE_SHIFT));
1417
1418        if (mtrr_chunk_size && mtrr_gran_size) {
1419                i = 0;
1420                mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
1421                                      extra_remove_base, extra_remove_size, i);
1422
1423                mtrr_print_out_one_result(i);
1424
1425                if (!result[i].bad) {
1426                        set_var_mtrr_all(address_bits);
1427                        return 1;
1428                }
1429                printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1430                       "will find optimal one\n");
1431        }
1432
1433        i = 0;
1434        memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1435        memset(result, 0, sizeof(result));
1436        for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
1437
1438                for (chunk_size = gran_size; chunk_size < (1ULL<<32);
1439                     chunk_size <<= 1) {
1440
1441                        if (i >= NUM_RESULT)
1442                                continue;
1443
1444                        mtrr_calc_range_state(chunk_size, gran_size,
1445                                      extra_remove_base, extra_remove_size, i);
1446                        if (debug_print) {
1447                                mtrr_print_out_one_result(i);
1448                                printk(KERN_INFO "\n");
1449                        }
1450
1451                        i++;
1452                }
1453        }
1454
1455        /* try to find the optimal index */
1456        index_good = mtrr_search_optimal_index();
1457
1458        if (index_good != -1) {
1459                printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1460                i = index_good;
1461                mtrr_print_out_one_result(i);
1462
1463                /* convert ranges to var ranges state */
1464                chunk_size = result[i].chunk_sizek;
1465                chunk_size <<= 10;
1466                gran_size = result[i].gran_sizek;
1467                gran_size <<= 10;
1468                x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1469                set_var_mtrr_all(address_bits);
1470                printk(KERN_DEBUG "New variable MTRRs\n");
1471                print_out_mtrr_range_state();
1472                return 1;
1473        } else {
1474                /* print out all */
1475                for (i = 0; i < NUM_RESULT; i++)
1476                        mtrr_print_out_one_result(i);
1477        }
1478
1479        printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
1480        printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
1481
1482        return 0;
1483}
1484#else
1485static int __init mtrr_cleanup(unsigned address_bits)
1486{
1487        return 0;
1488}
1489#endif
1490
1491static int __initdata changed_by_mtrr_cleanup;
1492
1493static int disable_mtrr_trim;
1494
1495static int __init disable_mtrr_trim_setup(char *str)
1496{
1497        disable_mtrr_trim = 1;
1498        return 0;
1499}
1500early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
1501
1502/*
1503 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
1504 * for memory >4GB. Check for that here.
1505 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
1506 * apply to are wrong, but so far we don't know of any such case in the wild.
1507 */
1508#define Tom2Enabled (1U << 21)
1509#define Tom2ForceMemTypeWB (1U << 22)
1510
1511int __init amd_special_default_mtrr(void)
1512{
1513        u32 l, h;
1514
1515        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1516                return 0;
1517        if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
1518                return 0;
1519        /* In case some hypervisor doesn't pass SYSCFG through */
1520        if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
1521                return 0;
1522        /*
1523         * Memory between 4GB and top of mem is forced WB by this magic bit.
1524         * Reserved before K8RevF, but should be zero there.
1525         */
1526        if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
1527                 (Tom2Enabled | Tom2ForceMemTypeWB))
1528                return 1;
1529        return 0;
1530}
1531
1532static u64 __init real_trim_memory(unsigned long start_pfn,
1533                                   unsigned long limit_pfn)
1534{
1535        u64 trim_start, trim_size;
1536        trim_start = start_pfn;
1537        trim_start <<= PAGE_SHIFT;
1538        trim_size = limit_pfn;
1539        trim_size <<= PAGE_SHIFT;
1540        trim_size -= trim_start;
1541
1542        return e820_update_range(trim_start, trim_size, E820_RAM,
1543                                E820_RESERVED);
1544}
1545/**
1546 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
1547 * @end_pfn: ending page frame number
1548 *
1549 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
1550 * memory configurations.  This routine checks that the highest MTRR matches
1551 * the end of memory, to make sure the MTRRs having a write back type cover
1552 * all of the memory the kernel is intending to use. If not, it'll trim any
1553 * memory off the end by adjusting end_pfn, removing it from the kernel's
1554 * allocation pools, warning the user with an obnoxious message.
1555 */
1556int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1557{
1558        unsigned long i, base, size, highest_pfn = 0, def, dummy;
1559        mtrr_type type;
1560        u64 total_trim_size;
1561
1562        /* extra one for all 0 */
1563        int num[MTRR_NUM_TYPES + 1];
1564        /*
1565         * Make sure we only trim uncachable memory on machines that
1566         * support the Intel MTRR architecture:
1567         */
1568        if (!is_cpu(INTEL) || disable_mtrr_trim)
1569                return 0;
1570        rdmsr(MTRRdefType_MSR, def, dummy);
1571        def &= 0xff;
1572        if (def != MTRR_TYPE_UNCACHABLE)
1573                return 0;
1574
1575        /* get it and store it aside */
1576        memset(range_state, 0, sizeof(range_state));
1577        for (i = 0; i < num_var_ranges; i++) {
1578                mtrr_if->get(i, &base, &size, &type);
1579                range_state[i].base_pfn = base;
1580                range_state[i].size_pfn = size;
1581                range_state[i].type = type;
1582        }
1583
1584        /* Find highest cached pfn */
1585        for (i = 0; i < num_var_ranges; i++) {
1586                type = range_state[i].type;
1587                if (type != MTRR_TYPE_WRBACK)
1588                        continue;
1589                base = range_state[i].base_pfn;
1590                size = range_state[i].size_pfn;
1591                if (highest_pfn < base + size)
1592                        highest_pfn = base + size;
1593        }
1594
1595        /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1596        if (!highest_pfn) {
1597                printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1598                return 0;
1599        }
1600
1601        /* check entries number */
1602        memset(num, 0, sizeof(num));
1603        for (i = 0; i < num_var_ranges; i++) {
1604                type = range_state[i].type;
1605                if (type >= MTRR_NUM_TYPES)
1606                        continue;
1607                size = range_state[i].size_pfn;
1608                if (!size)
1609                        type = MTRR_NUM_TYPES;
1610                num[type]++;
1611        }
1612
1613        /* no entry for WB? */
1614        if (!num[MTRR_TYPE_WRBACK])
1615                return 0;
1616
1617        /* check if we only had WB and UC */
1618        if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1619                num_var_ranges - num[MTRR_NUM_TYPES])
1620                return 0;
1621
1622        memset(range, 0, sizeof(range));
1623        nr_range = 0;
1624        if (mtrr_tom2) {
1625                range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1626                range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1627                if (highest_pfn < range[nr_range].end + 1)
1628                        highest_pfn = range[nr_range].end + 1;
1629                nr_range++;
1630        }
1631        nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1632
1633        total_trim_size = 0;
1634        /* check the head */
1635        if (range[0].start)
1636                total_trim_size += real_trim_memory(0, range[0].start);
1637        /* check the holes */
1638        for (i = 0; i < nr_range - 1; i++) {
1639                if (range[i].end + 1 < range[i+1].start)
1640                        total_trim_size += real_trim_memory(range[i].end + 1,
1641                                                            range[i+1].start);
1642        }
1643        /* check the top */
1644        i = nr_range - 1;
1645        if (range[i].end + 1 < end_pfn)
1646                total_trim_size += real_trim_memory(range[i].end + 1,
1647                                                         end_pfn);
1648
1649        if (total_trim_size) {
1650                printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1651                        " all of memory, losing %lluMB of RAM.\n",
1652                        total_trim_size >> 20);
1653
1654                if (!changed_by_mtrr_cleanup)
1655                        WARN_ON(1);
1656
1657                printk(KERN_INFO "update e820 for mtrr\n");
1658                update_e820();
1659
1660                return 1;
1661        }
1662
1663        return 0;
1664}
1665
1666/**
1667 * mtrr_bp_init - initialize mtrrs on the boot CPU
1668 *
1669 * This needs to be called early; before any of the other CPUs are 
1670 * initialized (i.e. before smp_init()).
1671 * 
1672 */
1673void __init mtrr_bp_init(void)
1674{
1675        u32 phys_addr;
1676        init_ifs();
1677
1678        phys_addr = 32;
1679
1680        if (cpu_has_mtrr) {
1681                mtrr_if = &generic_mtrr_ops;
1682                size_or_mask = 0xff000000;      /* 36 bits */
1683                size_and_mask = 0x00f00000;
1684                phys_addr = 36;
1685
1686                /* This is an AMD specific MSR, but we assume(hope?) that
1687                   Intel will implement it to when they extend the address
1688                   bus of the Xeon. */
1689                if (cpuid_eax(0x80000000) >= 0x80000008) {
1690                        phys_addr = cpuid_eax(0x80000008) & 0xff;
1691                        /* CPUID workaround for Intel 0F33/0F34 CPU */
1692                        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1693                            boot_cpu_data.x86 == 0xF &&
1694                            boot_cpu_data.x86_model == 0x3 &&
1695                            (boot_cpu_data.x86_mask == 0x3 ||
1696                             boot_cpu_data.x86_mask == 0x4))
1697                                phys_addr = 36;
1698
1699                        size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
1700                        size_and_mask = ~size_or_mask & 0xfffff00000ULL;
1701                } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
1702                           boot_cpu_data.x86 == 6) {
1703                        /* VIA C* family have Intel style MTRRs, but
1704                           don't support PAE */
1705                        size_or_mask = 0xfff00000;      /* 32 bits */
1706                        size_and_mask = 0;
1707                        phys_addr = 32;
1708                }
1709        } else {
1710                switch (boot_cpu_data.x86_vendor) {
1711                case X86_VENDOR_AMD:
1712                        if (cpu_has_k6_mtrr) {
1713                                /* Pre-Athlon (K6) AMD CPU MTRRs */
1714                                mtrr_if = mtrr_ops[X86_VENDOR_AMD];
1715                                size_or_mask = 0xfff00000;      /* 32 bits */
1716                                size_and_mask = 0;
1717                        }
1718                        break;
1719                case X86_VENDOR_CENTAUR:
1720                        if (cpu_has_centaur_mcr) {
1721                                mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
1722                                size_or_mask = 0xfff00000;      /* 32 bits */
1723                                size_and_mask = 0;
1724                        }
1725                        break;
1726                case X86_VENDOR_CYRIX:
1727                        if (cpu_has_cyrix_arr) {
1728                                mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
1729                                size_or_mask = 0xfff00000;      /* 32 bits */
1730                                size_and_mask = 0;
1731                        }
1732                        break;
1733                default:
1734                        break;
1735                }
1736        }
1737
1738        if (mtrr_if) {
1739                set_num_var_ranges();
1740                init_table();
1741                if (use_intel()) {
1742                        get_mtrr_state();
1743
1744                        if (mtrr_cleanup(phys_addr)) {
1745                                changed_by_mtrr_cleanup = 1;
1746                                mtrr_if->set_all();
1747                        }
1748
1749                }
1750        }
1751}
1752
1753void mtrr_ap_init(void)
1754{
1755        unsigned long flags;
1756
1757        if (!mtrr_if || !use_intel())
1758                return;
1759        /*
1760         * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
1761         * but this routine will be called in cpu boot time, holding the lock
1762         * breaks it. This routine is called in two cases: 1.very earily time
1763         * of software resume, when there absolutely isn't mtrr entry changes;
1764         * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
1765         * prevent mtrr entry changes
1766         */
1767        local_irq_save(flags);
1768
1769        mtrr_if->set_all();
1770
1771        local_irq_restore(flags);
1772}
1773
1774/**
1775 * Save current fixed-range MTRR state of the BSP
1776 */
1777void mtrr_save_state(void)
1778{
1779        smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
1780}
1781
1782static int __init mtrr_init_finialize(void)
1783{
1784        if (!mtrr_if)
1785                return 0;
1786        if (use_intel()) {
1787                if (!changed_by_mtrr_cleanup)
1788                        mtrr_state_warn();
1789        } else {
1790                /* The CPUs haven't MTRR and seem to not support SMP. They have
1791                 * specific drivers, we use a tricky method to support
1792                 * suspend/resume for them.
1793                 * TBD: is there any system with such CPU which supports
1794                 * suspend/resume?  if no, we should remove the code.
1795                 */
1796                sysdev_driver_register(&cpu_sysdev_class,
1797                        &mtrr_sysdev_driver);
1798        }
1799        return 0;
1800}
1801subsys_initcall(mtrr_init_finialize);
1802