linux/arch/x86/kernel/e820.c
<<
>>
Prefs
   1/*
   2 * Handle the memory map.
   3 * The functions here do the job until bootmem takes over.
   4 *
   5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
   6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
   7 *     Alex Achenbach <xela@slit.de>, December 2002.
   8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
   9 *
  10 */
  11#include <linux/kernel.h>
  12#include <linux/types.h>
  13#include <linux/init.h>
  14#include <linux/bootmem.h>
  15#include <linux/ioport.h>
  16#include <linux/string.h>
  17#include <linux/kexec.h>
  18#include <linux/module.h>
  19#include <linux/mm.h>
  20#include <linux/pfn.h>
  21#include <linux/suspend.h>
  22#include <linux/firmware-map.h>
  23
  24#include <asm/pgtable.h>
  25#include <asm/page.h>
  26#include <asm/e820.h>
  27#include <asm/proto.h>
  28#include <asm/setup.h>
  29#include <asm/trampoline.h>
  30
  31/*
  32 * The e820 map is the map that gets modified e.g. with command line parameters
  33 * and that is also registered with modifications in the kernel resource tree
  34 * with the iomem_resource as parent.
  35 *
  36 * The e820_saved is directly saved after the BIOS-provided memory map is
  37 * copied. It doesn't get modified afterwards. It's registered for the
  38 * /sys/firmware/memmap interface.
  39 *
  40 * That memory map is not modified and is used as base for kexec. The kexec'd
  41 * kernel should get the same memory map as the firmware provides. Then the
  42 * user can e.g. boot the original kernel with mem=1G while still booting the
  43 * next kernel with full memory.
  44 */
  45struct e820map e820;
  46struct e820map e820_saved;
  47
  48/* For PCI or other memory-mapped resources */
  49unsigned long pci_mem_start = 0xaeedbabe;
  50#ifdef CONFIG_PCI
  51EXPORT_SYMBOL(pci_mem_start);
  52#endif
  53
  54/*
  55 * This function checks if any part of the range <start,end> is mapped
  56 * with type.
  57 */
  58int
  59e820_any_mapped(u64 start, u64 end, unsigned type)
  60{
  61        int i;
  62
  63        for (i = 0; i < e820.nr_map; i++) {
  64                struct e820entry *ei = &e820.map[i];
  65
  66                if (type && ei->type != type)
  67                        continue;
  68                if (ei->addr >= end || ei->addr + ei->size <= start)
  69                        continue;
  70                return 1;
  71        }
  72        return 0;
  73}
  74EXPORT_SYMBOL_GPL(e820_any_mapped);
  75
  76/*
  77 * This function checks if the entire range <start,end> is mapped with type.
  78 *
  79 * Note: this function only works correct if the e820 table is sorted and
  80 * not-overlapping, which is the case
  81 */
  82int __init e820_all_mapped(u64 start, u64 end, unsigned type)
  83{
  84        int i;
  85
  86        for (i = 0; i < e820.nr_map; i++) {
  87                struct e820entry *ei = &e820.map[i];
  88
  89                if (type && ei->type != type)
  90                        continue;
  91                /* is the region (part) in overlap with the current region ?*/
  92                if (ei->addr >= end || ei->addr + ei->size <= start)
  93                        continue;
  94
  95                /* if the region is at the beginning of <start,end> we move
  96                 * start to the end of the region since it's ok until there
  97                 */
  98                if (ei->addr <= start)
  99                        start = ei->addr + ei->size;
 100                /*
 101                 * if start is now at or beyond end, we're done, full
 102                 * coverage
 103                 */
 104                if (start >= end)
 105                        return 1;
 106        }
 107        return 0;
 108}
 109
 110/*
 111 * Add a memory region to the kernel e820 map.
 112 */
 113void __init e820_add_region(u64 start, u64 size, int type)
 114{
 115        int x = e820.nr_map;
 116
 117        if (x == ARRAY_SIZE(e820.map)) {
 118                printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 119                return;
 120        }
 121
 122        e820.map[x].addr = start;
 123        e820.map[x].size = size;
 124        e820.map[x].type = type;
 125        e820.nr_map++;
 126}
 127
 128void __init e820_print_map(char *who)
 129{
 130        int i;
 131
 132        for (i = 0; i < e820.nr_map; i++) {
 133                printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
 134                       (unsigned long long) e820.map[i].addr,
 135                       (unsigned long long)
 136                       (e820.map[i].addr + e820.map[i].size));
 137                switch (e820.map[i].type) {
 138                case E820_RAM:
 139                case E820_RESERVED_KERN:
 140                        printk(KERN_CONT "(usable)\n");
 141                        break;
 142                case E820_RESERVED:
 143                        printk(KERN_CONT "(reserved)\n");
 144                        break;
 145                case E820_ACPI:
 146                        printk(KERN_CONT "(ACPI data)\n");
 147                        break;
 148                case E820_NVS:
 149                        printk(KERN_CONT "(ACPI NVS)\n");
 150                        break;
 151                case E820_UNUSABLE:
 152                        printk("(unusable)\n");
 153                        break;
 154                default:
 155                        printk(KERN_CONT "type %u\n", e820.map[i].type);
 156                        break;
 157                }
 158        }
 159}
 160
 161/*
 162 * Sanitize the BIOS e820 map.
 163 *
 164 * Some e820 responses include overlapping entries. The following
 165 * replaces the original e820 map with a new one, removing overlaps,
 166 * and resolving conflicting memory types in favor of highest
 167 * numbered type.
 168 *
 169 * The input parameter biosmap points to an array of 'struct
 170 * e820entry' which on entry has elements in the range [0, *pnr_map)
 171 * valid, and which has space for up to max_nr_map entries.
 172 * On return, the resulting sanitized e820 map entries will be in
 173 * overwritten in the same location, starting at biosmap.
 174 *
 175 * The integer pointed to by pnr_map must be valid on entry (the
 176 * current number of valid entries located at biosmap) and will
 177 * be updated on return, with the new number of valid entries
 178 * (something no more than max_nr_map.)
 179 *
 180 * The return value from sanitize_e820_map() is zero if it
 181 * successfully 'sanitized' the map entries passed in, and is -1
 182 * if it did nothing, which can happen if either of (1) it was
 183 * only passed one map entry, or (2) any of the input map entries
 184 * were invalid (start + size < start, meaning that the size was
 185 * so big the described memory range wrapped around through zero.)
 186 *
 187 *      Visually we're performing the following
 188 *      (1,2,3,4 = memory types)...
 189 *
 190 *      Sample memory map (w/overlaps):
 191 *         ____22__________________
 192 *         ______________________4_
 193 *         ____1111________________
 194 *         _44_____________________
 195 *         11111111________________
 196 *         ____________________33__
 197 *         ___________44___________
 198 *         __________33333_________
 199 *         ______________22________
 200 *         ___________________2222_
 201 *         _________111111111______
 202 *         _____________________11_
 203 *         _________________4______
 204 *
 205 *      Sanitized equivalent (no overlap):
 206 *         1_______________________
 207 *         _44_____________________
 208 *         ___1____________________
 209 *         ____22__________________
 210 *         ______11________________
 211 *         _________1______________
 212 *         __________3_____________
 213 *         ___________44___________
 214 *         _____________33_________
 215 *         _______________2________
 216 *         ________________1_______
 217 *         _________________4______
 218 *         ___________________2____
 219 *         ____________________33__
 220 *         ______________________4_
 221 */
 222
 223int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 224                                int *pnr_map)
 225{
 226        struct change_member {
 227                struct e820entry *pbios; /* pointer to original bios entry */
 228                unsigned long long addr; /* address for this change point */
 229        };
 230        static struct change_member change_point_list[2*E820_X_MAX] __initdata;
 231        static struct change_member *change_point[2*E820_X_MAX] __initdata;
 232        static struct e820entry *overlap_list[E820_X_MAX] __initdata;
 233        static struct e820entry new_bios[E820_X_MAX] __initdata;
 234        struct change_member *change_tmp;
 235        unsigned long current_type, last_type;
 236        unsigned long long last_addr;
 237        int chgidx, still_changing;
 238        int overlap_entries;
 239        int new_bios_entry;
 240        int old_nr, new_nr, chg_nr;
 241        int i;
 242
 243        /* if there's only one memory region, don't bother */
 244        if (*pnr_map < 2)
 245                return -1;
 246
 247        old_nr = *pnr_map;
 248        BUG_ON(old_nr > max_nr_map);
 249
 250        /* bail out if we find any unreasonable addresses in bios map */
 251        for (i = 0; i < old_nr; i++)
 252                if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
 253                        return -1;
 254
 255        /* create pointers for initial change-point information (for sorting) */
 256        for (i = 0; i < 2 * old_nr; i++)
 257                change_point[i] = &change_point_list[i];
 258
 259        /* record all known change-points (starting and ending addresses),
 260           omitting those that are for empty memory regions */
 261        chgidx = 0;
 262        for (i = 0; i < old_nr; i++)    {
 263                if (biosmap[i].size != 0) {
 264                        change_point[chgidx]->addr = biosmap[i].addr;
 265                        change_point[chgidx++]->pbios = &biosmap[i];
 266                        change_point[chgidx]->addr = biosmap[i].addr +
 267                                biosmap[i].size;
 268                        change_point[chgidx++]->pbios = &biosmap[i];
 269                }
 270        }
 271        chg_nr = chgidx;
 272
 273        /* sort change-point list by memory addresses (low -> high) */
 274        still_changing = 1;
 275        while (still_changing)  {
 276                still_changing = 0;
 277                for (i = 1; i < chg_nr; i++)  {
 278                        unsigned long long curaddr, lastaddr;
 279                        unsigned long long curpbaddr, lastpbaddr;
 280
 281                        curaddr = change_point[i]->addr;
 282                        lastaddr = change_point[i - 1]->addr;
 283                        curpbaddr = change_point[i]->pbios->addr;
 284                        lastpbaddr = change_point[i - 1]->pbios->addr;
 285
 286                        /*
 287                         * swap entries, when:
 288                         *
 289                         * curaddr > lastaddr or
 290                         * curaddr == lastaddr and curaddr == curpbaddr and
 291                         * lastaddr != lastpbaddr
 292                         */
 293                        if (curaddr < lastaddr ||
 294                            (curaddr == lastaddr && curaddr == curpbaddr &&
 295                             lastaddr != lastpbaddr)) {
 296                                change_tmp = change_point[i];
 297                                change_point[i] = change_point[i-1];
 298                                change_point[i-1] = change_tmp;
 299                                still_changing = 1;
 300                        }
 301                }
 302        }
 303
 304        /* create a new bios memory map, removing overlaps */
 305        overlap_entries = 0;     /* number of entries in the overlap table */
 306        new_bios_entry = 0;      /* index for creating new bios map entries */
 307        last_type = 0;           /* start with undefined memory type */
 308        last_addr = 0;           /* start with 0 as last starting address */
 309
 310        /* loop through change-points, determining affect on the new bios map */
 311        for (chgidx = 0; chgidx < chg_nr; chgidx++) {
 312                /* keep track of all overlapping bios entries */
 313                if (change_point[chgidx]->addr ==
 314                    change_point[chgidx]->pbios->addr) {
 315                        /*
 316                         * add map entry to overlap list (> 1 entry
 317                         * implies an overlap)
 318                         */
 319                        overlap_list[overlap_entries++] =
 320                                change_point[chgidx]->pbios;
 321                } else {
 322                        /*
 323                         * remove entry from list (order independent,
 324                         * so swap with last)
 325                         */
 326                        for (i = 0; i < overlap_entries; i++) {
 327                                if (overlap_list[i] ==
 328                                    change_point[chgidx]->pbios)
 329                                        overlap_list[i] =
 330                                                overlap_list[overlap_entries-1];
 331                        }
 332                        overlap_entries--;
 333                }
 334                /*
 335                 * if there are overlapping entries, decide which
 336                 * "type" to use (larger value takes precedence --
 337                 * 1=usable, 2,3,4,4+=unusable)
 338                 */
 339                current_type = 0;
 340                for (i = 0; i < overlap_entries; i++)
 341                        if (overlap_list[i]->type > current_type)
 342                                current_type = overlap_list[i]->type;
 343                /*
 344                 * continue building up new bios map based on this
 345                 * information
 346                 */
 347                if (current_type != last_type)  {
 348                        if (last_type != 0)      {
 349                                new_bios[new_bios_entry].size =
 350                                        change_point[chgidx]->addr - last_addr;
 351                                /*
 352                                 * move forward only if the new size
 353                                 * was non-zero
 354                                 */
 355                                if (new_bios[new_bios_entry].size != 0)
 356                                        /*
 357                                         * no more space left for new
 358                                         * bios entries ?
 359                                         */
 360                                        if (++new_bios_entry >= max_nr_map)
 361                                                break;
 362                        }
 363                        if (current_type != 0)  {
 364                                new_bios[new_bios_entry].addr =
 365                                        change_point[chgidx]->addr;
 366                                new_bios[new_bios_entry].type = current_type;
 367                                last_addr = change_point[chgidx]->addr;
 368                        }
 369                        last_type = current_type;
 370                }
 371        }
 372        /* retain count for new bios entries */
 373        new_nr = new_bios_entry;
 374
 375        /* copy new bios mapping into original location */
 376        memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
 377        *pnr_map = new_nr;
 378
 379        return 0;
 380}
 381
 382static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
 383{
 384        while (nr_map) {
 385                u64 start = biosmap->addr;
 386                u64 size = biosmap->size;
 387                u64 end = start + size;
 388                u32 type = biosmap->type;
 389
 390                /* Overflow in 64 bits? Ignore the memory map. */
 391                if (start > end)
 392                        return -1;
 393
 394                e820_add_region(start, size, type);
 395
 396                biosmap++;
 397                nr_map--;
 398        }
 399        return 0;
 400}
 401
 402/*
 403 * Copy the BIOS e820 map into a safe place.
 404 *
 405 * Sanity-check it while we're at it..
 406 *
 407 * If we're lucky and live on a modern system, the setup code
 408 * will have given us a memory map that we can use to properly
 409 * set up memory.  If we aren't, we'll fake a memory map.
 410 */
 411static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
 412{
 413        /* Only one memory region (or negative)? Ignore it */
 414        if (nr_map < 2)
 415                return -1;
 416
 417        return __append_e820_map(biosmap, nr_map);
 418}
 419
 420static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
 421                                        u64 size, unsigned old_type,
 422                                        unsigned new_type)
 423{
 424        int i;
 425        u64 real_updated_size = 0;
 426
 427        BUG_ON(old_type == new_type);
 428
 429        if (size > (ULLONG_MAX - start))
 430                size = ULLONG_MAX - start;
 431
 432        for (i = 0; i < e820.nr_map; i++) {
 433                struct e820entry *ei = &e820x->map[i];
 434                u64 final_start, final_end;
 435                if (ei->type != old_type)
 436                        continue;
 437                /* totally covered? */
 438                if (ei->addr >= start &&
 439                    (ei->addr + ei->size) <= (start + size)) {
 440                        ei->type = new_type;
 441                        real_updated_size += ei->size;
 442                        continue;
 443                }
 444                /* partially covered */
 445                final_start = max(start, ei->addr);
 446                final_end = min(start + size, ei->addr + ei->size);
 447                if (final_start >= final_end)
 448                        continue;
 449                e820_add_region(final_start, final_end - final_start,
 450                                         new_type);
 451                real_updated_size += final_end - final_start;
 452
 453                ei->size -= final_end - final_start;
 454                if (ei->addr < final_start)
 455                        continue;
 456                ei->addr = final_end;
 457        }
 458        return real_updated_size;
 459}
 460
 461u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 462                             unsigned new_type)
 463{
 464        return e820_update_range_map(&e820, start, size, old_type, new_type);
 465}
 466
 467static u64 __init e820_update_range_saved(u64 start, u64 size,
 468                                          unsigned old_type, unsigned new_type)
 469{
 470        return e820_update_range_map(&e820_saved, start, size, old_type,
 471                                     new_type);
 472}
 473
 474/* make e820 not cover the range */
 475u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
 476                             int checktype)
 477{
 478        int i;
 479        u64 real_removed_size = 0;
 480
 481        if (size > (ULLONG_MAX - start))
 482                size = ULLONG_MAX - start;
 483
 484        for (i = 0; i < e820.nr_map; i++) {
 485                struct e820entry *ei = &e820.map[i];
 486                u64 final_start, final_end;
 487
 488                if (checktype && ei->type != old_type)
 489                        continue;
 490                /* totally covered? */
 491                if (ei->addr >= start &&
 492                    (ei->addr + ei->size) <= (start + size)) {
 493                        real_removed_size += ei->size;
 494                        memset(ei, 0, sizeof(struct e820entry));
 495                        continue;
 496                }
 497                /* partially covered */
 498                final_start = max(start, ei->addr);
 499                final_end = min(start + size, ei->addr + ei->size);
 500                if (final_start >= final_end)
 501                        continue;
 502                real_removed_size += final_end - final_start;
 503
 504                ei->size -= final_end - final_start;
 505                if (ei->addr < final_start)
 506                        continue;
 507                ei->addr = final_end;
 508        }
 509        return real_removed_size;
 510}
 511
 512void __init update_e820(void)
 513{
 514        int nr_map;
 515
 516        nr_map = e820.nr_map;
 517        if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
 518                return;
 519        e820.nr_map = nr_map;
 520        printk(KERN_INFO "modified physical RAM map:\n");
 521        e820_print_map("modified");
 522}
 523static void __init update_e820_saved(void)
 524{
 525        int nr_map;
 526
 527        nr_map = e820_saved.nr_map;
 528        if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
 529                return;
 530        e820_saved.nr_map = nr_map;
 531}
 532#define MAX_GAP_END 0x100000000ull
 533/*
 534 * Search for a gap in the e820 memory space from start_addr to end_addr.
 535 */
 536__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
 537                unsigned long start_addr, unsigned long long end_addr)
 538{
 539        unsigned long long last;
 540        int i = e820.nr_map;
 541        int found = 0;
 542
 543        last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
 544
 545        while (--i >= 0) {
 546                unsigned long long start = e820.map[i].addr;
 547                unsigned long long end = start + e820.map[i].size;
 548
 549                if (end < start_addr)
 550                        continue;
 551
 552                /*
 553                 * Since "last" is at most 4GB, we know we'll
 554                 * fit in 32 bits if this condition is true
 555                 */
 556                if (last > end) {
 557                        unsigned long gap = last - end;
 558
 559                        if (gap >= *gapsize) {
 560                                *gapsize = gap;
 561                                *gapstart = end;
 562                                found = 1;
 563                        }
 564                }
 565                if (start < last)
 566                        last = start;
 567        }
 568        return found;
 569}
 570
 571/*
 572 * Search for the biggest gap in the low 32 bits of the e820
 573 * memory space.  We pass this space to PCI to assign MMIO resources
 574 * for hotplug or unconfigured devices in.
 575 * Hopefully the BIOS let enough space left.
 576 */
 577__init void e820_setup_gap(void)
 578{
 579        unsigned long gapstart, gapsize, round;
 580        int found;
 581
 582        gapstart = 0x10000000;
 583        gapsize = 0x400000;
 584        found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
 585
 586#ifdef CONFIG_X86_64
 587        if (!found) {
 588                gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
 589                printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
 590                       "address range\n"
 591                       KERN_ERR "PCI: Unassigned devices with 32bit resource "
 592                       "registers may break!\n");
 593        }
 594#endif
 595
 596        /*
 597         * See how much we want to round up: start off with
 598         * rounding to the next 1MB area.
 599         */
 600        round = 0x100000;
 601        while ((gapsize >> 4) > round)
 602                round += round;
 603        /* Fun with two's complement */
 604        pci_mem_start = (gapstart + round) & -round;
 605
 606        printk(KERN_INFO
 607               "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
 608               pci_mem_start, gapstart, gapsize);
 609}
 610
 611/**
 612 * Because of the size limitation of struct boot_params, only first
 613 * 128 E820 memory entries are passed to kernel via
 614 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
 615 * linked list of struct setup_data, which is parsed here.
 616 */
 617void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
 618{
 619        u32 map_len;
 620        int entries;
 621        struct e820entry *extmap;
 622
 623        entries = sdata->len / sizeof(struct e820entry);
 624        map_len = sdata->len + sizeof(struct setup_data);
 625        if (map_len > PAGE_SIZE)
 626                sdata = early_ioremap(pa_data, map_len);
 627        extmap = (struct e820entry *)(sdata->data);
 628        __append_e820_map(extmap, entries);
 629        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 630        if (map_len > PAGE_SIZE)
 631                early_iounmap(sdata, map_len);
 632        printk(KERN_INFO "extended physical RAM map:\n");
 633        e820_print_map("extended");
 634}
 635
 636#if defined(CONFIG_X86_64) || \
 637        (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
 638/**
 639 * Find the ranges of physical addresses that do not correspond to
 640 * e820 RAM areas and mark the corresponding pages as nosave for
 641 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
 642 *
 643 * This function requires the e820 map to be sorted and without any
 644 * overlapping entries and assumes the first e820 area to be RAM.
 645 */
 646void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 647{
 648        int i;
 649        unsigned long pfn;
 650
 651        pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
 652        for (i = 1; i < e820.nr_map; i++) {
 653                struct e820entry *ei = &e820.map[i];
 654
 655                if (pfn < PFN_UP(ei->addr))
 656                        register_nosave_region(pfn, PFN_UP(ei->addr));
 657
 658                pfn = PFN_DOWN(ei->addr + ei->size);
 659                if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
 660                        register_nosave_region(PFN_UP(ei->addr), pfn);
 661
 662                if (pfn >= limit_pfn)
 663                        break;
 664        }
 665}
 666#endif
 667
 668/*
 669 * Early reserved memory areas.
 670 */
 671#define MAX_EARLY_RES 20
 672
 673struct early_res {
 674        u64 start, end;
 675        char name[16];
 676        char overlap_ok;
 677};
 678static struct early_res early_res[MAX_EARLY_RES] __initdata = {
 679        { 0, PAGE_SIZE, "BIOS data page" },     /* BIOS data page */
 680#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
 681        { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
 682#endif
 683#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
 684        /*
 685         * But first pinch a few for the stack/trampoline stuff
 686         * FIXME: Don't need the extra page at 4K, but need to fix
 687         * trampoline before removing it. (see the GDT stuff)
 688         */
 689        { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
 690        /*
 691         * Has to be in very low memory so we can execute
 692         * real-mode AP code.
 693         */
 694        { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
 695#endif
 696        {}
 697};
 698
 699static int __init find_overlapped_early(u64 start, u64 end)
 700{
 701        int i;
 702        struct early_res *r;
 703
 704        for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 705                r = &early_res[i];
 706                if (end > r->start && start < r->end)
 707                        break;
 708        }
 709
 710        return i;
 711}
 712
 713/*
 714 * Drop the i-th range from the early reservation map,
 715 * by copying any higher ranges down one over it, and
 716 * clearing what had been the last slot.
 717 */
 718static void __init drop_range(int i)
 719{
 720        int j;
 721
 722        for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
 723                ;
 724
 725        memmove(&early_res[i], &early_res[i + 1],
 726               (j - 1 - i) * sizeof(struct early_res));
 727
 728        early_res[j - 1].end = 0;
 729}
 730
 731/*
 732 * Split any existing ranges that:
 733 *  1) are marked 'overlap_ok', and
 734 *  2) overlap with the stated range [start, end)
 735 * into whatever portion (if any) of the existing range is entirely
 736 * below or entirely above the stated range.  Drop the portion
 737 * of the existing range that overlaps with the stated range,
 738 * which will allow the caller of this routine to then add that
 739 * stated range without conflicting with any existing range.
 740 */
 741static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
 742{
 743        int i;
 744        struct early_res *r;
 745        u64 lower_start, lower_end;
 746        u64 upper_start, upper_end;
 747        char name[16];
 748
 749        for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 750                r = &early_res[i];
 751
 752                /* Continue past non-overlapping ranges */
 753                if (end <= r->start || start >= r->end)
 754                        continue;
 755
 756                /*
 757                 * Leave non-ok overlaps as is; let caller
 758                 * panic "Overlapping early reservations"
 759                 * when it hits this overlap.
 760                 */
 761                if (!r->overlap_ok)
 762                        return;
 763
 764                /*
 765                 * We have an ok overlap.  We will drop it from the early
 766                 * reservation map, and add back in any non-overlapping
 767                 * portions (lower or upper) as separate, overlap_ok,
 768                 * non-overlapping ranges.
 769                 */
 770
 771                /* 1. Note any non-overlapping (lower or upper) ranges. */
 772                strncpy(name, r->name, sizeof(name) - 1);
 773
 774                lower_start = lower_end = 0;
 775                upper_start = upper_end = 0;
 776                if (r->start < start) {
 777                        lower_start = r->start;
 778                        lower_end = start;
 779                }
 780                if (r->end > end) {
 781                        upper_start = end;
 782                        upper_end = r->end;
 783                }
 784
 785                /* 2. Drop the original ok overlapping range */
 786                drop_range(i);
 787
 788                i--;            /* resume for-loop on copied down entry */
 789
 790                /* 3. Add back in any non-overlapping ranges. */
 791                if (lower_end)
 792                        reserve_early_overlap_ok(lower_start, lower_end, name);
 793                if (upper_end)
 794                        reserve_early_overlap_ok(upper_start, upper_end, name);
 795        }
 796}
 797
 798static void __init __reserve_early(u64 start, u64 end, char *name,
 799                                                int overlap_ok)
 800{
 801        int i;
 802        struct early_res *r;
 803
 804        i = find_overlapped_early(start, end);
 805        if (i >= MAX_EARLY_RES)
 806                panic("Too many early reservations");
 807        r = &early_res[i];
 808        if (r->end)
 809                panic("Overlapping early reservations "
 810                      "%llx-%llx %s to %llx-%llx %s\n",
 811                      start, end - 1, name?name:"", r->start,
 812                      r->end - 1, r->name);
 813        r->start = start;
 814        r->end = end;
 815        r->overlap_ok = overlap_ok;
 816        if (name)
 817                strncpy(r->name, name, sizeof(r->name) - 1);
 818}
 819
 820/*
 821 * A few early reservtations come here.
 822 *
 823 * The 'overlap_ok' in the name of this routine does -not- mean it
 824 * is ok for these reservations to overlap an earlier reservation.
 825 * Rather it means that it is ok for subsequent reservations to
 826 * overlap this one.
 827 *
 828 * Use this entry point to reserve early ranges when you are doing
 829 * so out of "Paranoia", reserving perhaps more memory than you need,
 830 * just in case, and don't mind a subsequent overlapping reservation
 831 * that is known to be needed.
 832 *
 833 * The drop_overlaps_that_are_ok() call here isn't really needed.
 834 * It would be needed if we had two colliding 'overlap_ok'
 835 * reservations, so that the second such would not panic on the
 836 * overlap with the first.  We don't have any such as of this
 837 * writing, but might as well tolerate such if it happens in
 838 * the future.
 839 */
 840void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
 841{
 842        drop_overlaps_that_are_ok(start, end);
 843        __reserve_early(start, end, name, 1);
 844}
 845
 846/*
 847 * Most early reservations come here.
 848 *
 849 * We first have drop_overlaps_that_are_ok() drop any pre-existing
 850 * 'overlap_ok' ranges, so that we can then reserve this memory
 851 * range without risk of panic'ing on an overlapping overlap_ok
 852 * early reservation.
 853 */
 854void __init reserve_early(u64 start, u64 end, char *name)
 855{
 856        drop_overlaps_that_are_ok(start, end);
 857        __reserve_early(start, end, name, 0);
 858}
 859
 860void __init free_early(u64 start, u64 end)
 861{
 862        struct early_res *r;
 863        int i;
 864
 865        i = find_overlapped_early(start, end);
 866        r = &early_res[i];
 867        if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
 868                panic("free_early on not reserved area: %llx-%llx!",
 869                         start, end - 1);
 870
 871        drop_range(i);
 872}
 873
 874void __init early_res_to_bootmem(u64 start, u64 end)
 875{
 876        int i, count;
 877        u64 final_start, final_end;
 878
 879        count  = 0;
 880        for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
 881                count++;
 882
 883        printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
 884                         count, start, end);
 885        for (i = 0; i < count; i++) {
 886                struct early_res *r = &early_res[i];
 887                printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
 888                        r->start, r->end, r->name);
 889                final_start = max(start, r->start);
 890                final_end = min(end, r->end);
 891                if (final_start >= final_end) {
 892                        printk(KERN_CONT "\n");
 893                        continue;
 894                }
 895                printk(KERN_CONT " ==> [%010llx - %010llx]\n",
 896                        final_start, final_end);
 897                reserve_bootmem_generic(final_start, final_end - final_start,
 898                                BOOTMEM_DEFAULT);
 899        }
 900}
 901
 902/* Check for already reserved areas */
 903static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
 904{
 905        int i;
 906        u64 addr = *addrp;
 907        int changed = 0;
 908        struct early_res *r;
 909again:
 910        i = find_overlapped_early(addr, addr + size);
 911        r = &early_res[i];
 912        if (i < MAX_EARLY_RES && r->end) {
 913                *addrp = addr = round_up(r->end, align);
 914                changed = 1;
 915                goto again;
 916        }
 917        return changed;
 918}
 919
 920/* Check for already reserved areas */
 921static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
 922{
 923        int i;
 924        u64 addr = *addrp, last;
 925        u64 size = *sizep;
 926        int changed = 0;
 927again:
 928        last = addr + size;
 929        for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 930                struct early_res *r = &early_res[i];
 931                if (last > r->start && addr < r->start) {
 932                        size = r->start - addr;
 933                        changed = 1;
 934                        goto again;
 935                }
 936                if (last > r->end && addr < r->end) {
 937                        addr = round_up(r->end, align);
 938                        size = last - addr;
 939                        changed = 1;
 940                        goto again;
 941                }
 942                if (last <= r->end && addr >= r->start) {
 943                        (*sizep)++;
 944                        return 0;
 945                }
 946        }
 947        if (changed) {
 948                *addrp = addr;
 949                *sizep = size;
 950        }
 951        return changed;
 952}
 953
 954/*
 955 * Find a free area with specified alignment in a specific range.
 956 */
 957u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
 958{
 959        int i;
 960
 961        for (i = 0; i < e820.nr_map; i++) {
 962                struct e820entry *ei = &e820.map[i];
 963                u64 addr, last;
 964                u64 ei_last;
 965
 966                if (ei->type != E820_RAM)
 967                        continue;
 968                addr = round_up(ei->addr, align);
 969                ei_last = ei->addr + ei->size;
 970                if (addr < start)
 971                        addr = round_up(start, align);
 972                if (addr >= ei_last)
 973                        continue;
 974                while (bad_addr(&addr, size, align) && addr+size <= ei_last)
 975                        ;
 976                last = addr + size;
 977                if (last > ei_last)
 978                        continue;
 979                if (last > end)
 980                        continue;
 981                return addr;
 982        }
 983        return -1ULL;
 984}
 985
 986/*
 987 * Find next free range after *start
 988 */
 989u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
 990{
 991        int i;
 992
 993        for (i = 0; i < e820.nr_map; i++) {
 994                struct e820entry *ei = &e820.map[i];
 995                u64 addr, last;
 996                u64 ei_last;
 997
 998                if (ei->type != E820_RAM)
 999                        continue;
1000                addr = round_up(ei->addr, align);
1001                ei_last = ei->addr + ei->size;
1002                if (addr < start)
1003                        addr = round_up(start, align);
1004                if (addr >= ei_last)
1005                        continue;
1006                *sizep = ei_last - addr;
1007                while (bad_addr_size(&addr, sizep, align) &&
1008                        addr + *sizep <= ei_last)
1009                        ;
1010                last = addr + *sizep;
1011                if (last > ei_last)
1012                        continue;
1013                return addr;
1014        }
1015        return -1UL;
1016
1017}
1018
1019/*
1020 * pre allocated 4k and reserved it in e820
1021 */
1022u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
1023{
1024        u64 size = 0;
1025        u64 addr;
1026        u64 start;
1027
1028        start = startt;
1029        while (size < sizet)
1030                start = find_e820_area_size(start, &size, align);
1031
1032        if (size < sizet)
1033                return 0;
1034
1035        addr = round_down(start + size - sizet, align);
1036        e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
1037        e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
1038        printk(KERN_INFO "update e820 for early_reserve_e820\n");
1039        update_e820();
1040        update_e820_saved();
1041
1042        return addr;
1043}
1044
1045#ifdef CONFIG_X86_32
1046# ifdef CONFIG_X86_PAE
1047#  define MAX_ARCH_PFN          (1ULL<<(36-PAGE_SHIFT))
1048# else
1049#  define MAX_ARCH_PFN          (1ULL<<(32-PAGE_SHIFT))
1050# endif
1051#else /* CONFIG_X86_32 */
1052# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
1053#endif
1054
1055/*
1056 * Find the highest page frame number we have available
1057 */
1058static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
1059{
1060        int i;
1061        unsigned long last_pfn = 0;
1062        unsigned long max_arch_pfn = MAX_ARCH_PFN;
1063
1064        for (i = 0; i < e820.nr_map; i++) {
1065                struct e820entry *ei = &e820.map[i];
1066                unsigned long start_pfn;
1067                unsigned long end_pfn;
1068
1069                if (ei->type != type)
1070                        continue;
1071
1072                start_pfn = ei->addr >> PAGE_SHIFT;
1073                end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
1074
1075                if (start_pfn >= limit_pfn)
1076                        continue;
1077                if (end_pfn > limit_pfn) {
1078                        last_pfn = limit_pfn;
1079                        break;
1080                }
1081                if (end_pfn > last_pfn)
1082                        last_pfn = end_pfn;
1083        }
1084
1085        if (last_pfn > max_arch_pfn)
1086                last_pfn = max_arch_pfn;
1087
1088        printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
1089                         last_pfn, max_arch_pfn);
1090        return last_pfn;
1091}
1092unsigned long __init e820_end_of_ram_pfn(void)
1093{
1094        return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
1095}
1096
1097unsigned long __init e820_end_of_low_ram_pfn(void)
1098{
1099        return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
1100}
1101/*
1102 * Finds an active region in the address range from start_pfn to last_pfn and
1103 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
1104 */
1105int __init e820_find_active_region(const struct e820entry *ei,
1106                                  unsigned long start_pfn,
1107                                  unsigned long last_pfn,
1108                                  unsigned long *ei_startpfn,
1109                                  unsigned long *ei_endpfn)
1110{
1111        u64 align = PAGE_SIZE;
1112
1113        *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
1114        *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
1115
1116        /* Skip map entries smaller than a page */
1117        if (*ei_startpfn >= *ei_endpfn)
1118                return 0;
1119
1120        /* Skip if map is outside the node */
1121        if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
1122                                    *ei_startpfn >= last_pfn)
1123                return 0;
1124
1125        /* Check for overlaps */
1126        if (*ei_startpfn < start_pfn)
1127                *ei_startpfn = start_pfn;
1128        if (*ei_endpfn > last_pfn)
1129                *ei_endpfn = last_pfn;
1130
1131        return 1;
1132}
1133
1134/* Walk the e820 map and register active regions within a node */
1135void __init e820_register_active_regions(int nid, unsigned long start_pfn,
1136                                         unsigned long last_pfn)
1137{
1138        unsigned long ei_startpfn;
1139        unsigned long ei_endpfn;
1140        int i;
1141
1142        for (i = 0; i < e820.nr_map; i++)
1143                if (e820_find_active_region(&e820.map[i],
1144                                            start_pfn, last_pfn,
1145                                            &ei_startpfn, &ei_endpfn))
1146                        add_active_range(nid, ei_startpfn, ei_endpfn);
1147}
1148
1149/*
1150 * Find the hole size (in bytes) in the memory range.
1151 * @start: starting address of the memory range to scan
1152 * @end: ending address of the memory range to scan
1153 */
1154u64 __init e820_hole_size(u64 start, u64 end)
1155{
1156        unsigned long start_pfn = start >> PAGE_SHIFT;
1157        unsigned long last_pfn = end >> PAGE_SHIFT;
1158        unsigned long ei_startpfn, ei_endpfn, ram = 0;
1159        int i;
1160
1161        for (i = 0; i < e820.nr_map; i++) {
1162                if (e820_find_active_region(&e820.map[i],
1163                                            start_pfn, last_pfn,
1164                                            &ei_startpfn, &ei_endpfn))
1165                        ram += ei_endpfn - ei_startpfn;
1166        }
1167        return end - start - ((u64)ram << PAGE_SHIFT);
1168}
1169
1170static void early_panic(char *msg)
1171{
1172        early_printk(msg);
1173        panic(msg);
1174}
1175
1176static int userdef __initdata;
1177
1178/* "mem=nopentium" disables the 4MB page tables. */
1179static int __init parse_memopt(char *p)
1180{
1181        u64 mem_size;
1182
1183        if (!p)
1184                return -EINVAL;
1185
1186#ifdef CONFIG_X86_32
1187        if (!strcmp(p, "nopentium")) {
1188                setup_clear_cpu_cap(X86_FEATURE_PSE);
1189                return 0;
1190        }
1191#endif
1192
1193        userdef = 1;
1194        mem_size = memparse(p, &p);
1195        e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1196
1197        return 0;
1198}
1199early_param("mem", parse_memopt);
1200
1201static int __init parse_memmap_opt(char *p)
1202{
1203        char *oldp;
1204        u64 start_at, mem_size;
1205
1206        if (!p)
1207                return -EINVAL;
1208
1209        if (!strncmp(p, "exactmap", 8)) {
1210#ifdef CONFIG_CRASH_DUMP
1211                /*
1212                 * If we are doing a crash dump, we still need to know
1213                 * the real mem size before original memory map is
1214                 * reset.
1215                 */
1216                saved_max_pfn = e820_end_of_ram_pfn();
1217#endif
1218                e820.nr_map = 0;
1219                userdef = 1;
1220                return 0;
1221        }
1222
1223        oldp = p;
1224        mem_size = memparse(p, &p);
1225        if (p == oldp)
1226                return -EINVAL;
1227
1228        userdef = 1;
1229        if (*p == '@') {
1230                start_at = memparse(p+1, &p);
1231                e820_add_region(start_at, mem_size, E820_RAM);
1232        } else if (*p == '#') {
1233                start_at = memparse(p+1, &p);
1234                e820_add_region(start_at, mem_size, E820_ACPI);
1235        } else if (*p == '$') {
1236                start_at = memparse(p+1, &p);
1237                e820_add_region(start_at, mem_size, E820_RESERVED);
1238        } else
1239                e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1240
1241        return *p == '\0' ? 0 : -EINVAL;
1242}
1243early_param("memmap", parse_memmap_opt);
1244
1245void __init finish_e820_parsing(void)
1246{
1247        if (userdef) {
1248                int nr = e820.nr_map;
1249
1250                if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
1251                        early_panic("Invalid user supplied memory map");
1252                e820.nr_map = nr;
1253
1254                printk(KERN_INFO "user-defined physical RAM map:\n");
1255                e820_print_map("user");
1256        }
1257}
1258
1259static inline const char *e820_type_to_string(int e820_type)
1260{
1261        switch (e820_type) {
1262        case E820_RESERVED_KERN:
1263        case E820_RAM:  return "System RAM";
1264        case E820_ACPI: return "ACPI Tables";
1265        case E820_NVS:  return "ACPI Non-volatile Storage";
1266        case E820_UNUSABLE:     return "Unusable memory";
1267        default:        return "reserved";
1268        }
1269}
1270
1271/*
1272 * Mark e820 reserved areas as busy for the resource manager.
1273 */
1274static struct resource __initdata *e820_res;
1275void __init e820_reserve_resources(void)
1276{
1277        int i;
1278        struct resource *res;
1279        u64 end;
1280
1281        res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1282        e820_res = res;
1283        for (i = 0; i < e820.nr_map; i++) {
1284                end = e820.map[i].addr + e820.map[i].size - 1;
1285                if (end != (resource_size_t)end) {
1286                        res++;
1287                        continue;
1288                }
1289                res->name = e820_type_to_string(e820.map[i].type);
1290                res->start = e820.map[i].addr;
1291                res->end = end;
1292
1293                res->flags = IORESOURCE_MEM;
1294
1295                /*
1296                 * don't register the region that could be conflicted with
1297                 * pci device BAR resource and insert them later in
1298                 * pcibios_resource_survey()
1299                 */
1300                if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
1301                        res->flags |= IORESOURCE_BUSY;
1302                        insert_resource(&iomem_resource, res);
1303                }
1304                res++;
1305        }
1306
1307        for (i = 0; i < e820_saved.nr_map; i++) {
1308                struct e820entry *entry = &e820_saved.map[i];
1309                firmware_map_add_early(entry->addr,
1310                        entry->addr + entry->size - 1,
1311                        e820_type_to_string(entry->type));
1312        }
1313}
1314
1315void __init e820_reserve_resources_late(void)
1316{
1317        int i;
1318        struct resource *res;
1319
1320        res = e820_res;
1321        for (i = 0; i < e820.nr_map; i++) {
1322                if (!res->parent && res->end)
1323                        insert_resource_expand_to_fit(&iomem_resource, res);
1324                res++;
1325        }
1326}
1327
1328char *__init default_machine_specific_memory_setup(void)
1329{
1330        char *who = "BIOS-e820";
1331        int new_nr;
1332        /*
1333         * Try to copy the BIOS-supplied E820-map.
1334         *
1335         * Otherwise fake a memory map; one section from 0k->640k,
1336         * the next section from 1mb->appropriate_mem_k
1337         */
1338        new_nr = boot_params.e820_entries;
1339        sanitize_e820_map(boot_params.e820_map,
1340                        ARRAY_SIZE(boot_params.e820_map),
1341                        &new_nr);
1342        boot_params.e820_entries = new_nr;
1343        if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1344          < 0) {
1345                u64 mem_size;
1346
1347                /* compare results from other methods and take the greater */
1348                if (boot_params.alt_mem_k
1349                    < boot_params.screen_info.ext_mem_k) {
1350                        mem_size = boot_params.screen_info.ext_mem_k;
1351                        who = "BIOS-88";
1352                } else {
1353                        mem_size = boot_params.alt_mem_k;
1354                        who = "BIOS-e801";
1355                }
1356
1357                e820.nr_map = 0;
1358                e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1359                e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1360        }
1361
1362        /* In case someone cares... */
1363        return who;
1364}
1365
1366char *__init __attribute__((weak)) machine_specific_memory_setup(void)
1367{
1368        if (x86_quirks->arch_memory_setup) {
1369                char *who = x86_quirks->arch_memory_setup();
1370
1371                if (who)
1372                        return who;
1373        }
1374        return default_machine_specific_memory_setup();
1375}
1376
1377/* Overridden in paravirt.c if CONFIG_PARAVIRT */
1378char * __init __attribute__((weak)) memory_setup(void)
1379{
1380        return machine_specific_memory_setup();
1381}
1382
1383void __init setup_memory_map(void)
1384{
1385        char *who;
1386
1387        who = memory_setup();
1388        memcpy(&e820_saved, &e820, sizeof(struct e820map));
1389        printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1390        e820_print_map(who);
1391}
1392
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.