linux-bk/arch/x86_64/kernel/e820.c
<<
>>
Prefs
   1/* 
   2 * Handle the memory map.
   3 * The functions here do the job until bootmem takes over.
   4 * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
   5 */
   6#include <linux/config.h>
   7#include <linux/kernel.h>
   8#include <linux/types.h>
   9#include <linux/init.h>
  10#include <linux/bootmem.h>
  11#include <linux/ioport.h>
  12#include <linux/string.h>
  13#include <asm/page.h>
  14#include <asm/e820.h>
  15#include <asm/proto.h>
  16#include <asm/bootsetup.h>
  17
  18extern char _end[];
  19
  20/* 
  21 * PFN of last memory page.
  22 */
  23unsigned long end_pfn; 
  24
  25/* 
  26 * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
  27 * The direct mapping extends to end_pfn_map, so that we can directly access
  28 * apertures, ACPI and other tables without having to play with fixmaps.
  29 */ 
  30unsigned long end_pfn_map; 
  31
  32/* 
  33 * Last pfn which the user wants to use.
  34 */
  35unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;  
  36
  37extern struct resource code_resource, data_resource;
  38
  39/* Check for some hardcoded bad areas that early boot is not allowed to touch */ 
  40static inline int bad_addr(unsigned long *addrp, unsigned long size)
  41{ 
  42        unsigned long addr = *addrp, last = addr + size; 
  43
  44        /* various gunk below that needed for SMP startup */
  45        if (addr < 0x8000) { 
  46                *addrp = 0x8000;
  47                return 1; 
  48        }
  49
  50        /* direct mapping tables of the kernel */
  51        if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { 
  52                *addrp = table_end << PAGE_SHIFT; 
  53                return 1;
  54        } 
  55
  56        /* initrd */ 
  57#ifdef CONFIG_BLK_DEV_INITRD
  58        if (LOADER_TYPE && INITRD_START && last >= INITRD_START && 
  59            addr < INITRD_START+INITRD_SIZE) { 
  60                *addrp = INITRD_START + INITRD_SIZE; 
  61                return 1;
  62        } 
  63#endif
  64        /* kernel code + 640k memory hole (later should not be needed, but 
  65           be paranoid for now) */
  66        if (last >= 640*1024 && addr < __pa_symbol(&_end)) { 
  67                *addrp = __pa_symbol(&_end);
  68                return 1;
  69        }
  70        /* XXX ramdisk image here? */ 
  71        return 0;
  72} 
  73
  74int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) 
  75{ 
  76        int i;
  77        for (i = 0; i < e820.nr_map; i++) { 
  78                struct e820entry *ei = &e820.map[i]; 
  79                if (type && ei->type != type) 
  80                        continue;
  81                if (ei->addr >= end || ei->addr + ei->size < start) 
  82                        continue; 
  83                return 1; 
  84        } 
  85        return 0;
  86}
  87
  88/* 
  89 * Find a free area in a specific range. 
  90 */ 
  91unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) 
  92{ 
  93        int i; 
  94        for (i = 0; i < e820.nr_map; i++) { 
  95                struct e820entry *ei = &e820.map[i]; 
  96                unsigned long addr = ei->addr, last; 
  97                if (ei->type != E820_RAM) 
  98                        continue; 
  99                if (addr < start) 
 100                        addr = start;
 101                if (addr > ei->addr + ei->size) 
 102                        continue; 
 103                while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size)
 104                        ;
 105                last = addr + size;
 106                if (last > ei->addr + ei->size)
 107                        continue;
 108                if (last > end) 
 109                        continue;
 110                return addr; 
 111        } 
 112        return -1UL;            
 113} 
 114
 115/* 
 116 * Free bootmem based on the e820 table for a node.
 117 */
 118void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
 119{
 120        int i;
 121        for (i = 0; i < e820.nr_map; i++) {
 122                struct e820entry *ei = &e820.map[i]; 
 123                unsigned long last, addr;
 124
 125                if (ei->type != E820_RAM || 
 126                    ei->addr+ei->size <= start || 
 127                    ei->addr > end)
 128                        continue;
 129
 130                addr = round_up(ei->addr, PAGE_SIZE);
 131                if (addr < start) 
 132                        addr = start;
 133
 134                last = round_down(ei->addr + ei->size, PAGE_SIZE); 
 135                if (last >= end)
 136                        last = end; 
 137
 138                if (last > addr && last-addr >= PAGE_SIZE)
 139                        free_bootmem_node(pgdat, addr, last-addr);
 140        }
 141}
 142
 143/*
 144 * Find the highest page frame number we have available
 145 */
 146unsigned long __init e820_end_of_ram(void)
 147{
 148        int i;
 149        unsigned long end_pfn = 0;
 150        
 151        for (i = 0; i < e820.nr_map; i++) {
 152                struct e820entry *ei = &e820.map[i]; 
 153                unsigned long start, end;
 154
 155                start = round_up(ei->addr, PAGE_SIZE); 
 156                end = round_down(ei->addr + ei->size, PAGE_SIZE); 
 157                if (start >= end)
 158                        continue;
 159                if (ei->type == E820_RAM) { 
 160                if (end > end_pfn<<PAGE_SHIFT)
 161                        end_pfn = end>>PAGE_SHIFT;
 162                } else { 
 163                        if (end > end_pfn_map<<PAGE_SHIFT) 
 164                                end_pfn_map = end>>PAGE_SHIFT;
 165                } 
 166        }
 167
 168        if (end_pfn > end_pfn_map) 
 169                end_pfn_map = end_pfn;
 170        if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
 171                end_pfn_map = MAXMEM>>PAGE_SHIFT;
 172        if (end_pfn > end_user_pfn)
 173                end_pfn = end_user_pfn;
 174        if (end_pfn > end_pfn_map) 
 175                end_pfn = end_pfn_map; 
 176
 177        return end_pfn; 
 178}
 179
 180/* 
 181 * Mark e820 reserved areas as busy for the resource manager.
 182 */
 183void __init e820_reserve_resources(void)
 184{
 185        int i;
 186        for (i = 0; i < e820.nr_map; i++) {
 187                struct resource *res;
 188                if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
 189                        continue;
 190                res = alloc_bootmem_low(sizeof(struct resource));
 191                switch (e820.map[i].type) {
 192                case E820_RAM:  res->name = "System RAM"; break;
 193                case E820_ACPI: res->name = "ACPI Tables"; break;
 194                case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
 195                default:        res->name = "reserved";
 196                }
 197                res->start = e820.map[i].addr;
 198                res->end = res->start + e820.map[i].size - 1;
 199                res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 200                request_resource(&iomem_resource, res);
 201                if (e820.map[i].type == E820_RAM) {
 202                        /*
 203                         *  We don't know which RAM region contains kernel data,
 204                         *  so we try it repeatedly and let the resource manager
 205                         *  test it.
 206                         */
 207                        request_resource(res, &code_resource);
 208                        request_resource(res, &data_resource);
 209                }
 210        }
 211}
 212
 213/* 
 214 * Add a memory region to the kernel e820 map.
 215 */ 
 216void __init add_memory_region(unsigned long start, unsigned long size, int type)
 217{
 218        int x = e820.nr_map;
 219
 220        if (x == E820MAX) {
 221                printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 222                return;
 223        }
 224
 225        e820.map[x].addr = start;
 226        e820.map[x].size = size;
 227        e820.map[x].type = type;
 228        e820.nr_map++;
 229}
 230
 231void __init e820_print_map(char *who)
 232{
 233        int i;
 234
 235        for (i = 0; i < e820.nr_map; i++) {
 236                printk(" %s: %016Lx - %016Lx ", who,
 237                        (unsigned long long) e820.map[i].addr,
 238                        (unsigned long long) (e820.map[i].addr + e820.map[i].size));
 239                switch (e820.map[i].type) {
 240                case E820_RAM:  printk("(usable)\n");
 241                                break;
 242                case E820_RESERVED:
 243                                printk("(reserved)\n");
 244                                break;
 245                case E820_ACPI:
 246                                printk("(ACPI data)\n");
 247                                break;
 248                case E820_NVS:
 249                                printk("(ACPI NVS)\n");
 250                                break;
 251                default:        printk("type %u\n", e820.map[i].type);
 252                                break;
 253                }
 254        }
 255}
 256
 257/*
 258 * Sanitize the BIOS e820 map.
 259 *
 260 * Some e820 responses include overlapping entries.  The following 
 261 * replaces the original e820 map with a new one, removing overlaps.
 262 *
 263 */
 264static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
 265{
 266        struct change_member {
 267                struct e820entry *pbios; /* pointer to original bios entry */
 268                unsigned long long addr; /* address for this change point */
 269        };
 270        static struct change_member change_point_list[2*E820MAX] __initdata;
 271        static struct change_member *change_point[2*E820MAX] __initdata;
 272        static struct e820entry *overlap_list[E820MAX] __initdata;
 273        static struct e820entry new_bios[E820MAX] __initdata;
 274        struct change_member *change_tmp;
 275        unsigned long current_type, last_type;
 276        unsigned long long last_addr;
 277        int chgidx, still_changing;
 278        int overlap_entries;
 279        int new_bios_entry;
 280        int old_nr, new_nr;
 281        int i;
 282
 283        /*
 284                Visually we're performing the following (1,2,3,4 = memory types)...
 285
 286                Sample memory map (w/overlaps):
 287                   ____22__________________
 288                   ______________________4_
 289                   ____1111________________
 290                   _44_____________________
 291                   11111111________________
 292                   ____________________33__
 293                   ___________44___________
 294                   __________33333_________
 295                   ______________22________
 296                   ___________________2222_
 297                   _________111111111______
 298                   _____________________11_
 299                   _________________4______
 300
 301                Sanitized equivalent (no overlap):
 302                   1_______________________
 303                   _44_____________________
 304                   ___1____________________
 305                   ____22__________________
 306                   ______11________________
 307                   _________1______________
 308                   __________3_____________
 309                   ___________44___________
 310                   _____________33_________
 311                   _______________2________
 312                   ________________1_______
 313                   _________________4______
 314                   ___________________2____
 315                   ____________________33__
 316                   ______________________4_
 317        */
 318
 319        /* if there's only one memory region, don't bother */
 320        if (*pnr_map < 2)
 321                return -1;
 322
 323        old_nr = *pnr_map;
 324
 325        /* bail out if we find any unreasonable addresses in bios map */
 326        for (i=0; i<old_nr; i++)
 327                if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
 328                        return -1;
 329
 330        /* create pointers for initial change-point information (for sorting) */
 331        for (i=0; i < 2*old_nr; i++)
 332                change_point[i] = &change_point_list[i];
 333
 334        /* record all known change-points (starting and ending addresses) */
 335        chgidx = 0;
 336        for (i=0; i < old_nr; i++)      {
 337                change_point[chgidx]->addr = biosmap[i].addr;
 338                change_point[chgidx++]->pbios = &biosmap[i];
 339                change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
 340                change_point[chgidx++]->pbios = &biosmap[i];
 341        }
 342
 343        /* sort change-point list by memory addresses (low -> high) */
 344        still_changing = 1;
 345        while (still_changing)  {
 346                still_changing = 0;
 347                for (i=1; i < 2*old_nr; i++)  {
 348                        /* if <current_addr> > <last_addr>, swap */
 349                        /* or, if current=<start_addr> & last=<end_addr>, swap */
 350                        if ((change_point[i]->addr < change_point[i-1]->addr) ||
 351                                ((change_point[i]->addr == change_point[i-1]->addr) &&
 352                                 (change_point[i]->addr == change_point[i]->pbios->addr) &&
 353                                 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
 354                           )
 355                        {
 356                                change_tmp = change_point[i];
 357                                change_point[i] = change_point[i-1];
 358                                change_point[i-1] = change_tmp;
 359                                still_changing=1;
 360                        }
 361                }
 362        }
 363
 364        /* create a new bios memory map, removing overlaps */
 365        overlap_entries=0;       /* number of entries in the overlap table */
 366        new_bios_entry=0;        /* index for creating new bios map entries */
 367        last_type = 0;           /* start with undefined memory type */
 368        last_addr = 0;           /* start with 0 as last starting address */
 369        /* loop through change-points, determining affect on the new bios map */
 370        for (chgidx=0; chgidx < 2*old_nr; chgidx++)
 371        {
 372                /* keep track of all overlapping bios entries */
 373                if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
 374                {
 375                        /* add map entry to overlap list (> 1 entry implies an overlap) */
 376                        overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
 377                }
 378                else
 379                {
 380                        /* remove entry from list (order independent, so swap with last) */
 381                        for (i=0; i<overlap_entries; i++)
 382                        {
 383                                if (overlap_list[i] == change_point[chgidx]->pbios)
 384                                        overlap_list[i] = overlap_list[overlap_entries-1];
 385                        }
 386                        overlap_entries--;
 387                }
 388                /* if there are overlapping entries, decide which "type" to use */
 389                /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
 390                current_type = 0;
 391                for (i=0; i<overlap_entries; i++)
 392                        if (overlap_list[i]->type > current_type)
 393                                current_type = overlap_list[i]->type;
 394                /* continue building up new bios map based on this information */
 395                if (current_type != last_type)  {
 396                        if (last_type != 0)      {
 397                                new_bios[new_bios_entry].size =
 398                                        change_point[chgidx]->addr - last_addr;
 399                                /* move forward only if the new size was non-zero */
 400                                if (new_bios[new_bios_entry].size != 0)
 401                                        if (++new_bios_entry >= E820MAX)
 402                                                break;  /* no more space left for new bios entries */
 403                        }
 404                        if (current_type != 0)  {
 405                                new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
 406                                new_bios[new_bios_entry].type = current_type;
 407                                last_addr=change_point[chgidx]->addr;
 408                        }
 409                        last_type = current_type;
 410                }
 411        }
 412        new_nr = new_bios_entry;   /* retain count for new bios entries */
 413
 414        /* copy new bios mapping into original location */
 415        memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
 416        *pnr_map = new_nr;
 417
 418        return 0;
 419}
 420
 421/*
 422 * Copy the BIOS e820 map into a safe place.
 423 *
 424 * Sanity-check it while we're at it..
 425 *
 426 * If we're lucky and live on a modern system, the setup code
 427 * will have given us a memory map that we can use to properly
 428 * set up memory.  If we aren't, we'll fake a memory map.
 429 *
 430 * We check to see that the memory map contains at least 2 elements
 431 * before we'll use it, because the detection code in setup.S may
 432 * not be perfect and most every PC known to man has two memory
 433 * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
 434 * thinkpad 560x, for example, does not cooperate with the memory
 435 * detection code.)
 436 */
 437static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 438{
 439        /* Only one memory region (or negative)? Ignore it */
 440        if (nr_map < 2)
 441                return -1;
 442
 443        do {
 444                unsigned long start = biosmap->addr;
 445                unsigned long size = biosmap->size;
 446                unsigned long end = start + size;
 447                unsigned long type = biosmap->type;
 448
 449                /* Overflow in 64 bits? Ignore the memory map. */
 450                if (start > end)
 451                        return -1;
 452
 453                /*
 454                 * Some BIOSes claim RAM in the 640k - 1M region.
 455                 * Not right. Fix it up.
 456                 * 
 457                 * This should be removed on Hammer which is supposed to not
 458                 * have non e820 covered ISA mappings there, but I had some strange
 459                 * problems so it stays for now.  -AK
 460                 */
 461                if (type == E820_RAM) {
 462                        if (start < 0x100000ULL && end > 0xA0000ULL) {
 463                                if (start < 0xA0000ULL)
 464                                        add_memory_region(start, 0xA0000ULL-start, type);
 465                                if (end <= 0x100000ULL)
 466                                        continue;
 467                                start = 0x100000ULL;
 468                                size = end - start;
 469                        }
 470                }
 471
 472                add_memory_region(start, size, type);
 473        } while (biosmap++,--nr_map);
 474        return 0;
 475}
 476
 477void __init setup_memory_region(void)
 478{
 479        char *who = "BIOS-e820";
 480
 481        /*
 482         * Try to copy the BIOS-supplied E820-map.
 483         *
 484         * Otherwise fake a memory map; one section from 0k->640k,
 485         * the next section from 1mb->appropriate_mem_k
 486         */
 487        sanitize_e820_map(E820_MAP, &E820_MAP_NR);
 488        if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
 489                unsigned long mem_size;
 490
 491                /* compare results from other methods and take the greater */
 492                if (ALT_MEM_K < EXT_MEM_K) {
 493                        mem_size = EXT_MEM_K;
 494                        who = "BIOS-88";
 495                } else {
 496                        mem_size = ALT_MEM_K;
 497                        who = "BIOS-e801";
 498                }
 499
 500                e820.nr_map = 0;
 501                add_memory_region(0, LOWMEMSIZE(), E820_RAM);
 502                add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
 503        }
 504        printk(KERN_INFO "BIOS-provided physical RAM map:\n");
 505        e820_print_map(who);
 506}
 507
 508void __init parse_memopt(char *p, char **from) 
 509{ 
 510        /*
 511         * mem=XXX[kKmM] limits kernel memory to XXX+1MB
 512         *
 513         * It would be more logical to count from 0 instead of from
 514         * HIGH_MEMORY, but we keep that for now for i386 compatibility. 
 515         *      
 516         * No support for custom mapping like i386.  The reason is
 517         * that we need to read the e820 map anyways to handle the
 518         * ACPI mappings in the direct map.  Also on x86-64 there
 519         * should be always a good e820 map. This is only an upper
 520         * limit, you cannot force usage of memory not in e820.
 521         *
 522         * -AK
 523                         */
 524        end_user_pfn = memparse(p, from) + HIGH_MEMORY;
 525        end_user_pfn >>= PAGE_SHIFT;    
 526} 
 527
 528
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.