linux-old/arch/i386/mm/init.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/i386/mm/init.c
   3 *
   4 *  Copyright (C) 1995  Linus Torvalds
   5 *
   6 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   7 */
   8
   9#include <linux/config.h>
  10#include <linux/signal.h>
  11#include <linux/sched.h>
  12#include <linux/kernel.h>
  13#include <linux/errno.h>
  14#include <linux/string.h>
  15#include <linux/types.h>
  16#include <linux/ptrace.h>
  17#include <linux/mman.h>
  18#include <linux/mm.h>
  19#include <linux/swap.h>
  20#include <linux/smp.h>
  21#include <linux/init.h>
  22#ifdef CONFIG_BLK_DEV_INITRD
  23#include <linux/blk.h>
  24#endif
  25#include <linux/highmem.h>
  26#include <linux/pagemap.h>
  27#include <linux/bootmem.h>
  28
  29#include <asm/processor.h>
  30#include <asm/system.h>
  31#include <asm/uaccess.h>
  32#include <asm/pgtable.h>
  33#include <asm/pgalloc.h>
  34#include <asm/dma.h>
  35#include <asm/fixmap.h>
  36#include <asm/e820.h>
  37#include <asm/apic.h>
  38
  39unsigned long highstart_pfn, highend_pfn;
  40static unsigned long totalram_pages = 0;
  41static unsigned long totalhigh_pages = 0;
  42
  43/*
  44 * BAD_PAGE is the page that is used for page faults when linux
  45 * is out-of-memory. Older versions of linux just did a
  46 * do_exit(), but using this instead means there is less risk
  47 * for a process dying in kernel mode, possibly leaving an inode
  48 * unused etc..
  49 *
  50 * BAD_PAGETABLE is the accompanying page-table: it is initialized
  51 * to point to BAD_PAGE entries.
  52 *
  53 * ZERO_PAGE is a special page that is used for zero-initialized
  54 * data and COW.
  55 */
  56
  57/*
  58 * These are allocated in head.S so that we get proper page alignment.
  59 * If you change the size of these then change head.S as well.
  60 */
  61extern char empty_bad_page[PAGE_SIZE];
  62#if CONFIG_X86_PAE
  63extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD];
  64#endif
  65extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
  66
  67/*
  68 * We init them before every return and make them writable-shared.
  69 * This guarantees we get out of the kernel in some more or less sane
  70 * way.
  71 */
  72#if CONFIG_X86_PAE
  73static pmd_t * get_bad_pmd_table(void)
  74{
  75        pmd_t v;
  76        int i;
  77
  78        set_pmd(&v, __pmd(_PAGE_TABLE + __pa(empty_bad_pte_table)));
  79
  80        for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++)
  81                empty_bad_pmd_table[i] = v;
  82
  83        return empty_bad_pmd_table;
  84}
  85#endif
  86
  87static pte_t * get_bad_pte_table(void)
  88{
  89        pte_t v;
  90        int i;
  91
  92        v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED));
  93
  94        for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++)
  95                empty_bad_pte_table[i] = v;
  96
  97        return empty_bad_pte_table;
  98}
  99
 100
 101
 102void __handle_bad_pmd(pmd_t *pmd)
 103{
 104        pmd_ERROR(*pmd);
 105        set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table())));
 106}
 107
 108void __handle_bad_pmd_kernel(pmd_t *pmd)
 109{
 110        pmd_ERROR(*pmd);
 111        set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table())));
 112}
 113
 114pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
 115{
 116        pte_t *pte;
 117
 118        pte = (pte_t *) __get_free_page(GFP_KERNEL);
 119        if (pmd_none(*pmd)) {
 120                if (pte) {
 121                        clear_page(pte);
 122                        set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
 123                        return pte + offset;
 124                }
 125                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table())));
 126                return NULL;
 127        }
 128        free_page((unsigned long)pte);
 129        if (pmd_bad(*pmd)) {
 130                __handle_bad_pmd_kernel(pmd);
 131                return NULL;
 132        }
 133        return (pte_t *) pmd_page(*pmd) + offset;
 134}
 135
 136pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
 137{
 138        unsigned long pte;
 139
 140        pte = (unsigned long) __get_free_page(GFP_KERNEL);
 141        if (pmd_none(*pmd)) {
 142                if (pte) {
 143                        clear_page((void *)pte);
 144                        set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));
 145                        return (pte_t *)pte + offset;
 146                }
 147                set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table())));
 148                return NULL;
 149        }
 150        free_page(pte);
 151        if (pmd_bad(*pmd)) {
 152                __handle_bad_pmd(pmd);
 153                return NULL;
 154        }
 155        return (pte_t *) pmd_page(*pmd) + offset;
 156}
 157
 158int do_check_pgt_cache(int low, int high)
 159{
 160        int freed = 0;
 161        if(pgtable_cache_size > high) {
 162                do {
 163                        if(pgd_quicklist)
 164                                free_pgd_slow(get_pgd_fast()), freed++;
 165                        if(pmd_quicklist)
 166                                free_pmd_slow(get_pmd_fast()), freed++;
 167                        if(pte_quicklist)
 168                                free_pte_slow(get_pte_fast()), freed++;
 169                } while(pgtable_cache_size > low);
 170        }
 171        return freed;
 172}
 173
 174/*
 175 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
 176 * physical space so we can cache the place of the first one and move
 177 * around without checking the pgd every time.
 178 */
 179
 180#if CONFIG_HIGHMEM
 181pte_t *kmap_pte;
 182pgprot_t kmap_prot;
 183
 184#define kmap_get_fixmap_pte(vaddr)                                      \
 185        pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
 186
 187void __init kmap_init(void)
 188{
 189        unsigned long kmap_vstart;
 190
 191        /* cache the first kmap pte */
 192        kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 193        kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
 194
 195        kmap_prot = PAGE_KERNEL;
 196}
 197#endif /* CONFIG_HIGHMEM */
 198
 199void show_mem(void)
 200{
 201        int i,free = 0, total = 0, reserved = 0;
 202        int shared = 0, cached = 0;
 203        int highmem = 0;
 204
 205        printk("Mem-info:\n");
 206        show_free_areas();
 207        printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 208        i = max_mapnr;
 209        while (i-- > 0) {
 210                total++;
 211                if (PageHighMem(mem_map+i))
 212                        highmem++;
 213                if (PageReserved(mem_map+i))
 214                        reserved++;
 215                else if (PageSwapCache(mem_map+i))
 216                        cached++;
 217                else if (!page_count(mem_map+i))
 218                        free++;
 219                else
 220                        shared += page_count(mem_map+i) - 1;
 221        }
 222        printk("%d pages of RAM\n", total);
 223        printk("%d pages of HIGHMEM\n",highmem);
 224        printk("%d reserved pages\n",reserved);
 225        printk("%d pages shared\n",shared);
 226        printk("%d pages swap cached\n",cached);
 227        printk("%ld pages in page table cache\n",pgtable_cache_size);
 228        show_buffers();
 229}
 230
 231/* References to section boundaries */
 232
 233extern char _text, _etext, _edata, __bss_start, _end;
 234extern char __init_begin, __init_end;
 235
 236static inline void set_pte_phys (unsigned long vaddr,
 237                        unsigned long phys, pgprot_t flags)
 238{
 239        pgprot_t prot;
 240        pgd_t *pgd;
 241        pmd_t *pmd;
 242        pte_t *pte;
 243
 244        pgd = swapper_pg_dir + __pgd_offset(vaddr);
 245        pmd = pmd_offset(pgd, vaddr);
 246        pte = pte_offset(pmd, vaddr);
 247        pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
 248        set_pte(pte, mk_pte_phys(phys, prot));
 249
 250        /*
 251         * It's enough to flush this one mapping.
 252         * (PGE mappings get flushed as well)
 253         */
 254        __flush_tlb_one(vaddr);
 255}
 256
 257void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 258{
 259        unsigned long address = __fix_to_virt(idx);
 260
 261        if (idx >= __end_of_fixed_addresses) {
 262                printk("Invalid __set_fixmap\n");
 263                return;
 264        }
 265        set_pte_phys(address, phys, flags);
 266}
 267
 268static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
 269{
 270        pgd_t *pgd;
 271        pmd_t *pmd;
 272        pte_t *pte;
 273        int i, j;
 274
 275        i = __pgd_offset(start);
 276        j = __pmd_offset(start);
 277        pgd = pgd_base + i;
 278
 279        for ( ; (i < PTRS_PER_PGD) && (start != end); pgd++, i++) {
 280#if CONFIG_X86_PAE
 281                if (pgd_none(*pgd)) {
 282                        pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 283                        set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
 284                        if (pmd != pmd_offset(pgd, start))
 285                                BUG();
 286                }
 287                pmd = pmd_offset(pgd, start);
 288#else
 289                pmd = (pmd_t *)pgd;
 290#endif
 291                for (; (j < PTRS_PER_PMD) && start; pmd++, j++) {
 292                        if (pmd_none(*pmd)) {
 293                                pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 294                                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
 295                                if (pte != pte_offset(pmd, 0))
 296                                        BUG();
 297                        }
 298                        start += PMD_SIZE;
 299                }
 300                j = 0;
 301        }
 302}
 303
 304static void __init pagetable_init(void)
 305{
 306        pgd_t *pgd, *pgd_base;
 307        pmd_t *pmd;
 308        pte_t *pte;
 309        int i, j, k;
 310        unsigned long vaddr, end;
 311
 312        end = (unsigned long)__va(max_low_pfn*PAGE_SIZE) - 1;
 313
 314        i = __pgd_offset(PAGE_OFFSET);
 315        pgd_base = swapper_pg_dir;
 316        pgd = pgd_base + i;
 317
 318        for (; i < PTRS_PER_PGD; pgd++, i++) {
 319                vaddr = i*PGDIR_SIZE;
 320                if (vaddr >= end)
 321                        break;
 322#if CONFIG_X86_PAE
 323                pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 324                set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
 325#else
 326                pmd = (pmd_t *)pgd;
 327#endif
 328                if (pmd != pmd_offset(pgd, 0))
 329                        BUG();
 330                for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
 331                        vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
 332                        if (vaddr >= end)
 333                                break;
 334                        if (cpu_has_pse) {
 335                                unsigned long __pe;
 336
 337                                set_in_cr4(X86_CR4_PSE);
 338                                boot_cpu_data.wp_works_ok = 1;
 339                                __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr);
 340                                /* Make it "global" too if supported */
 341                                if (cpu_has_pge) {
 342                                        set_in_cr4(X86_CR4_PGE);
 343                                        __pe += _PAGE_GLOBAL;
 344                                }
 345                                set_pmd(pmd, __pmd(__pe));
 346                                continue;
 347                        }
 348
 349                        pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 350                        set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
 351
 352                        if (pte != pte_offset(pmd, 0))
 353                                BUG();
 354
 355                        for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
 356                                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
 357                                if (vaddr >= end)
 358                                        break;
 359                                *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
 360                        }
 361                }
 362        }
 363
 364        /*
 365         * Fixed mappings, only the page table structure has to be
 366         * created - mappings will be set by set_fixmap():
 367         */
 368        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 369        fixrange_init(vaddr, 0, pgd_base);
 370
 371#if CONFIG_HIGHMEM
 372        /*
 373         * Permanent kmaps:
 374         */
 375        vaddr = PKMAP_BASE;
 376        fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 377
 378        pgd = swapper_pg_dir + __pgd_offset(vaddr);
 379        pmd = pmd_offset(pgd, vaddr);
 380        pte = pte_offset(pmd, vaddr);
 381        pkmap_page_table = pte;
 382#endif
 383
 384#if CONFIG_X86_PAE
 385        /*
 386         * Add low memory identity-mappings - SMP needs it when
 387         * starting up on an AP from real-mode. In the non-PAE
 388         * case we already have these mappings through head.S.
 389         * All user-space mappings are explicitly cleared after
 390         * SMP startup.
 391         */
 392        pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
 393#endif
 394}
 395
 396void __init zap_low_mappings (void)
 397{
 398        int i;
 399        /*
 400         * Zap initial low-memory mappings.
 401         *
 402         * Note that "pgd_clear()" doesn't do it for
 403         * us in this case, because pgd_clear() is a
 404         * no-op in the 2-level case (pmd_clear() is
 405         * the thing that clears the page-tables in
 406         * that case).
 407         */
 408        for (i = 0; i < USER_PTRS_PER_PGD; i++)
 409#if CONFIG_X86_PAE
 410                pgd_clear(swapper_pg_dir+i);
 411#else
 412                set_pgd(swapper_pg_dir+i, __pgd(0));
 413#endif
 414        flush_tlb_all();
 415}
 416
 417/*
 418 * paging_init() sets up the page tables - note that the first 4MB are
 419 * already mapped by head.S.
 420 *
 421 * This routines also unmaps the page at virtual kernel address 0, so
 422 * that we can trap those pesky NULL-reference errors in the kernel.
 423 */
 424void __init paging_init(void)
 425{
 426        pagetable_init();
 427
 428        __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
 429
 430#if CONFIG_X86_PAE
 431        /*
 432         * We will bail out later - printk doesnt work right now so
 433         * the user would just see a hanging kernel.
 434         */
 435        if (cpu_has_pae)
 436                set_in_cr4(X86_CR4_PAE);
 437#endif
 438
 439        __flush_tlb_all();
 440
 441#ifdef CONFIG_HIGHMEM
 442        kmap_init();
 443#endif
 444        {
 445                unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
 446                unsigned int max_dma, high, low;
 447
 448                max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 449                low = max_low_pfn;
 450                high = highend_pfn;
 451
 452                if (low < max_dma)
 453                        zones_size[ZONE_DMA] = low;
 454                else {
 455                        zones_size[ZONE_DMA] = max_dma;
 456                        zones_size[ZONE_NORMAL] = low - max_dma;
 457#ifdef CONFIG_HIGHMEM
 458                        zones_size[ZONE_HIGHMEM] = high - low;
 459#endif
 460                }
 461                free_area_init(zones_size);
 462        }
 463        return;
 464}
 465
 466/*
 467 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
 468 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
 469 * before and after the test are here to work-around some nasty CPU bugs.
 470 */
 471
 472void __init test_wp_bit(void)
 473{
 474/*
 475 * Ok, all PAE-capable CPUs are definitely handling the WP bit right.
 476 */
 477        const unsigned long vaddr = PAGE_OFFSET;
 478        pgd_t *pgd;
 479        pmd_t *pmd;
 480        pte_t *pte, old_pte;
 481        char tmp_reg;
 482
 483        printk("Checking if this processor honours the WP bit even in supervisor mode... ");
 484
 485        pgd = swapper_pg_dir + __pgd_offset(vaddr);
 486        pmd = pmd_offset(pgd, vaddr);
 487        pte = pte_offset(pmd, vaddr);
 488        old_pte = *pte;
 489        *pte = mk_pte_phys(0, PAGE_READONLY);
 490        local_flush_tlb();
 491
 492        __asm__ __volatile__(
 493                "jmp 1f; 1:\n"
 494                "movb %0,%1\n"
 495                "movb %1,%0\n"
 496                "jmp 1f; 1:\n"
 497                :"=m" (*(char *) vaddr),
 498                 "=q" (tmp_reg)
 499                :/* no inputs */
 500                :"memory");
 501
 502        *pte = old_pte;
 503        local_flush_tlb();
 504
 505        if (boot_cpu_data.wp_works_ok < 0) {
 506                boot_cpu_data.wp_works_ok = 0;
 507                printk("No.\n");
 508#ifdef CONFIG_X86_WP_WORKS_OK
 509                panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
 510#endif
 511        } else
 512                printk(".\n");
 513}
 514
 515static inline int page_is_ram (unsigned long pagenr)
 516{
 517        int i;
 518
 519        for (i = 0; i < e820.nr_map; i++) {
 520                unsigned long addr, end;
 521
 522                if (e820.map[i].type != E820_RAM)       /* not usable memory */
 523                        continue;
 524                /*
 525                 *      !!!FIXME!!! Some BIOSen report areas as RAM that
 526                 *      are not. Notably the 640->1Mb area. We need a sanity
 527                 *      check here.
 528                 */
 529                addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
 530                end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
 531                if  ((pagenr >= addr) && (pagenr < end))
 532                        return 1;
 533        }
 534        return 0;
 535}
 536
 537void __init mem_init(void)
 538{
 539        int codesize, reservedpages, datasize, initsize;
 540        int tmp;
 541
 542        if (!mem_map)
 543                BUG();
 544
 545#ifdef CONFIG_HIGHMEM
 546        highmem_start_page = mem_map + highstart_pfn;
 547        /* cache the highmem_mapnr */
 548        highmem_mapnr = highstart_pfn;
 549        max_mapnr = num_physpages = highend_pfn;
 550#else
 551        max_mapnr = num_physpages = max_low_pfn;
 552#endif
 553        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 554
 555        /* clear the zero-page */
 556        memset(empty_zero_page, 0, PAGE_SIZE);
 557
 558        /* this will put all low memory onto the freelists */
 559        totalram_pages += free_all_bootmem();
 560
 561        reservedpages = 0;
 562        for (tmp = 0; tmp < max_low_pfn; tmp++)
 563                /*
 564                 * Only count reserved RAM pages
 565                 */
 566                if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
 567                        reservedpages++;
 568#ifdef CONFIG_HIGHMEM
 569        for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
 570                struct page *page = mem_map + tmp;
 571
 572                if (!page_is_ram(tmp)) {
 573                        SetPageReserved(page);
 574                        continue;
 575                }
 576                ClearPageReserved(page);
 577                set_bit(PG_highmem, &page->flags);
 578                atomic_set(&page->count, 1);
 579                __free_page(page);
 580                totalhigh_pages++;
 581        }
 582        totalram_pages += totalhigh_pages;
 583#endif
 584        codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 585        datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 586        initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 587
 588        printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
 589                (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 590                max_mapnr << (PAGE_SHIFT-10),
 591                codesize >> 10,
 592                reservedpages << (PAGE_SHIFT-10),
 593                datasize >> 10,
 594                initsize >> 10,
 595                (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
 596               );
 597
 598#if CONFIG_X86_PAE
 599        if (!cpu_has_pae)
 600                panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
 601#endif
 602        if (boot_cpu_data.wp_works_ok < 0)
 603                test_wp_bit();
 604
 605        /*
 606         * Subtle. SMP is doing it's boot stuff late (because it has to
 607         * fork idle threads) - but it also needs low mappings for the
 608         * protected-mode entry to work. We zap these entries only after
 609         * the WP-bit has been tested.
 610         */
 611#ifndef CONFIG_SMP
 612        zap_low_mappings();
 613#endif
 614
 615}
 616
 617void free_initmem(void)
 618{
 619        unsigned long addr;
 620
 621        addr = (unsigned long)(&__init_begin);
 622        for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
 623                ClearPageReserved(mem_map + MAP_NR(addr));
 624                set_page_count(mem_map+MAP_NR(addr), 1);
 625                free_page(addr);
 626                totalram_pages++;
 627        }
 628        printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
 629}
 630
 631#ifdef CONFIG_BLK_DEV_INITRD
 632void free_initrd_mem(unsigned long start, unsigned long end)
 633{
 634        if (start < end)
 635                printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 636        for (; start < end; start += PAGE_SIZE) {
 637                ClearPageReserved(mem_map + MAP_NR(start));
 638                set_page_count(mem_map+MAP_NR(start), 1);
 639                free_page(start);
 640                totalram_pages++;
 641        }
 642}
 643#endif
 644
 645void si_meminfo(struct sysinfo *val)
 646{
 647        val->totalram = totalram_pages;
 648        val->sharedram = 0;
 649        val->freeram = nr_free_pages();
 650        val->bufferram = atomic_read(&buffermem_pages);
 651        val->totalhigh = totalhigh_pages;
 652        val->freehigh = nr_free_highpages();
 653        val->mem_unit = PAGE_SIZE;
 654        return;
 655}
 656
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.