linux-bk/arch/i386/mm/init.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/i386/mm/init.c
   3 *
   4 *  Copyright (C) 1995  Linus Torvalds
   5 *
   6 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   7 */
   8
   9#include <linux/config.h>
  10#include <linux/signal.h>
  11#include <linux/sched.h>
  12#include <linux/kernel.h>
  13#include <linux/errno.h>
  14#include <linux/string.h>
  15#include <linux/types.h>
  16#include <linux/ptrace.h>
  17#include <linux/mman.h>
  18#include <linux/mm.h>
  19#include <linux/swap.h>
  20#include <linux/smp.h>
  21#include <linux/init.h>
  22#ifdef CONFIG_BLK_DEV_INITRD
  23#include <linux/blk.h>
  24#endif
  25#include <linux/highmem.h>
  26#include <linux/pagemap.h>
  27#include <linux/bootmem.h>
  28#include <linux/slab.h>
  29
  30#include <asm/processor.h>
  31#include <asm/system.h>
  32#include <asm/uaccess.h>
  33#include <asm/pgtable.h>
  34#include <asm/pgalloc.h>
  35#include <asm/dma.h>
  36#include <asm/fixmap.h>
  37#include <asm/e820.h>
  38#include <asm/apic.h>
  39#include <asm/tlb.h>
  40#include <asm/tlbflush.h>
  41
  42mmu_gather_t mmu_gathers[NR_CPUS];
  43unsigned long highstart_pfn, highend_pfn;
  44
  45/*
  46 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  47 * physical space so we can cache the place of the first one and move
  48 * around without checking the pgd every time.
  49 */
  50
  51#if CONFIG_HIGHMEM
  52pte_t *kmap_pte;
  53pgprot_t kmap_prot;
  54
  55#define kmap_get_fixmap_pte(vaddr)                                      \
  56        pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
  57
  58void __init kmap_init(void)
  59{
  60        unsigned long kmap_vstart;
  61
  62        /* cache the first kmap pte */
  63        kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
  64        kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
  65
  66        kmap_prot = PAGE_KERNEL;
  67}
  68#endif /* CONFIG_HIGHMEM */
  69
  70void show_mem(void)
  71{
  72        int i, total = 0, reserved = 0;
  73        int shared = 0, cached = 0;
  74        int highmem = 0;
  75
  76        printk("Mem-info:\n");
  77        show_free_areas();
  78        printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
  79        i = max_mapnr;
  80        while (i-- > 0) {
  81                total++;
  82                if (PageHighMem(mem_map+i))
  83                        highmem++;
  84                if (PageReserved(mem_map+i))
  85                        reserved++;
  86                else if (PageSwapCache(mem_map+i))
  87                        cached++;
  88                else if (page_count(mem_map+i))
  89                        shared += page_count(mem_map+i) - 1;
  90        }
  91        printk("%d pages of RAM\n", total);
  92        printk("%d pages of HIGHMEM\n",highmem);
  93        printk("%d reserved pages\n",reserved);
  94        printk("%d pages shared\n",shared);
  95        printk("%d pages swap cached\n",cached);
  96}
  97
  98/* References to section boundaries */
  99
 100extern char _text, _etext, _edata, __bss_start, _end;
 101extern char __init_begin, __init_end;
 102
 103static inline void set_pte_phys (unsigned long vaddr,
 104                        unsigned long phys, pgprot_t flags)
 105{
 106        pgd_t *pgd;
 107        pmd_t *pmd;
 108        pte_t *pte;
 109
 110        pgd = swapper_pg_dir + __pgd_offset(vaddr);
 111        if (pgd_none(*pgd)) {
 112                printk("PAE BUG #00!\n");
 113                return;
 114        }
 115        pmd = pmd_offset(pgd, vaddr);
 116        if (pmd_none(*pmd)) {
 117                printk("PAE BUG #01!\n");
 118                return;
 119        }
 120        pte = pte_offset_kernel(pmd, vaddr);
 121        /* <phys,flags> stored as-is, to permit clearing entries */
 122        set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 123
 124        /*
 125         * It's enough to flush this one mapping.
 126         * (PGE mappings get flushed as well)
 127         */
 128        __flush_tlb_one(vaddr);
 129}
 130
 131void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 132{
 133        unsigned long address = __fix_to_virt(idx);
 134
 135        if (idx >= __end_of_fixed_addresses) {
 136                printk("Invalid __set_fixmap\n");
 137                return;
 138        }
 139        set_pte_phys(address, phys, flags);
 140}
 141
 142static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
 143{
 144        pgd_t *pgd;
 145        pmd_t *pmd;
 146        pte_t *pte;
 147        int i, j;
 148        unsigned long vaddr;
 149
 150        vaddr = start;
 151        i = __pgd_offset(vaddr);
 152        j = __pmd_offset(vaddr);
 153        pgd = pgd_base + i;
 154
 155        for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
 156#if CONFIG_X86_PAE
 157                if (pgd_none(*pgd)) {
 158                        pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 159                        set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
 160                        if (pmd != pmd_offset(pgd, 0))
 161                                printk("PAE BUG #02!\n");
 162                }
 163                pmd = pmd_offset(pgd, vaddr);
 164#else
 165                pmd = (pmd_t *)pgd;
 166#endif
 167                for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
 168                        if (pmd_none(*pmd)) {
 169                                pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 170                                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
 171                                if (pte != pte_offset_kernel(pmd, 0))
 172                                        BUG();
 173                        }
 174                        vaddr += PMD_SIZE;
 175                }
 176                j = 0;
 177        }
 178}
 179
 180unsigned long __PAGE_KERNEL = _PAGE_KERNEL;
 181
 182static void __init pagetable_init (void)
 183{
 184        unsigned long vaddr, pfn;
 185        pgd_t *pgd, *pgd_base;
 186        int i, j, k;
 187        pmd_t *pmd;
 188        pte_t *pte, *pte_base;
 189
 190        pgd_base = swapper_pg_dir;
 191#if CONFIG_X86_PAE
 192        for (i = 0; i < PTRS_PER_PGD; i++)
 193                set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
 194#endif
 195        if (cpu_has_pse) {
 196                set_in_cr4(X86_CR4_PSE);
 197        }
 198        if (cpu_has_pge) {
 199                set_in_cr4(X86_CR4_PGE);
 200                __PAGE_KERNEL |= _PAGE_GLOBAL;
 201        }
 202
 203        i = __pgd_offset(PAGE_OFFSET);
 204        pfn = 0;
 205        pgd = pgd_base + i;
 206
 207        for (; i < PTRS_PER_PGD && pfn < max_low_pfn; pgd++, i++) {
 208#if CONFIG_X86_PAE
 209                pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 210                set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
 211#else
 212                pmd = (pmd_t *) pgd;
 213#endif
 214                for (j = 0; j < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, j++) {
 215                        if (cpu_has_pse) {
 216                                set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
 217                                pfn += PTRS_PER_PTE;
 218                        } else {
 219                                pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 220
 221                                for (k = 0; k < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, k++)
 222                                        set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
 223
 224                                set_pmd(pmd, __pmd(__pa(pte_base) | _KERNPG_TABLE));
 225                        }
 226                }
 227        }
 228
 229        /*
 230         * Fixed mappings, only the page table structure has to be
 231         * created - mappings will be set by set_fixmap():
 232         */
 233        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 234        fixrange_init(vaddr, 0, pgd_base);
 235
 236#if CONFIG_HIGHMEM
 237        /*
 238         * Permanent kmaps:
 239         */
 240        vaddr = PKMAP_BASE;
 241        fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 242
 243        pgd = swapper_pg_dir + __pgd_offset(vaddr);
 244        pmd = pmd_offset(pgd, vaddr);
 245        pte = pte_offset_kernel(pmd, vaddr);
 246        pkmap_page_table = pte;
 247#endif
 248
 249#if CONFIG_X86_PAE
 250        /*
 251         * Add low memory identity-mappings - SMP needs it when
 252         * starting up on an AP from real-mode. In the non-PAE
 253         * case we already have these mappings through head.S.
 254         * All user-space mappings are explicitly cleared after
 255         * SMP startup.
 256         */
 257        pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
 258#endif
 259}
 260
 261void __init zap_low_mappings (void)
 262{
 263        int i;
 264        /*
 265         * Zap initial low-memory mappings.
 266         *
 267         * Note that "pgd_clear()" doesn't do it for
 268         * us, because pgd_clear() is a no-op on i386.
 269         */
 270        for (i = 0; i < USER_PTRS_PER_PGD; i++)
 271#if CONFIG_X86_PAE
 272                set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 273#else
 274                set_pgd(swapper_pg_dir+i, __pgd(0));
 275#endif
 276        flush_tlb_all();
 277}
 278
 279/*
 280 * paging_init() sets up the page tables - note that the first 8MB are
 281 * already mapped by head.S.
 282 *
 283 * This routines also unmaps the page at virtual kernel address 0, so
 284 * that we can trap those pesky NULL-reference errors in the kernel.
 285 */
 286void __init paging_init(void)
 287{
 288        pagetable_init();
 289
 290        load_cr3(swapper_pg_dir);
 291
 292#if CONFIG_X86_PAE
 293        /*
 294         * We will bail out later - printk doesn't work right now so
 295         * the user would just see a hanging kernel.
 296         */
 297        if (cpu_has_pae)
 298                set_in_cr4(X86_CR4_PAE);
 299#endif
 300
 301        __flush_tlb_all();
 302
 303#ifdef CONFIG_HIGHMEM
 304        kmap_init();
 305#endif
 306        {
 307                unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
 308                unsigned int max_dma, high, low;
 309
 310                max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 311                low = max_low_pfn;
 312                high = highend_pfn;
 313
 314                if (low < max_dma)
 315                        zones_size[ZONE_DMA] = low;
 316                else {
 317                        zones_size[ZONE_DMA] = max_dma;
 318                        zones_size[ZONE_NORMAL] = low - max_dma;
 319#ifdef CONFIG_HIGHMEM
 320                        zones_size[ZONE_HIGHMEM] = high - low;
 321#endif
 322                }
 323                free_area_init(zones_size);
 324        }
 325        return;
 326}
 327
 328/*
 329 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
 330 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
 331 * before and after the test are here to work-around some nasty CPU bugs.
 332 */
 333
 334/*
 335 * This function cannot be __init, since exceptions don't work in that
 336 * section.
 337 */
 338static int do_test_wp_bit(unsigned long vaddr);
 339
 340void __init test_wp_bit(void)
 341{
 342        const unsigned long vaddr = PAGE_OFFSET;
 343        pgd_t *pgd;
 344        pmd_t *pmd;
 345        pte_t *pte, old_pte;
 346
 347        if (cpu_has_pse) {
 348                /* Ok, all PSE-capable CPUs are definitely handling the WP bit right. */
 349                boot_cpu_data.wp_works_ok = 1;
 350                return;
 351        }
 352
 353        printk("Checking if this processor honours the WP bit even in supervisor mode... ");
 354
 355        pgd = swapper_pg_dir + __pgd_offset(vaddr);
 356        pmd = pmd_offset(pgd, vaddr);
 357        pte = pte_offset_kernel(pmd, vaddr);
 358        old_pte = *pte;
 359        *pte = pfn_pte(0, PAGE_READONLY);
 360        local_flush_tlb();
 361
 362        boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr);
 363
 364        *pte = old_pte;
 365        local_flush_tlb();
 366
 367        if (!boot_cpu_data.wp_works_ok) {
 368                printk("No.\n");
 369#ifdef CONFIG_X86_WP_WORKS_OK
 370                panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
 371#endif
 372        } else {
 373                printk("Ok.\n");
 374        }
 375}
 376
 377static inline int page_is_ram (unsigned long pagenr)
 378{
 379        int i;
 380
 381        for (i = 0; i < e820.nr_map; i++) {
 382                unsigned long addr, end;
 383
 384                if (e820.map[i].type != E820_RAM)       /* not usable memory */
 385                        continue;
 386                /*
 387                 *      !!!FIXME!!! Some BIOSen report areas as RAM that
 388                 *      are not. Notably the 640->1Mb area. We need a sanity
 389                 *      check here.
 390                 */
 391                addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
 392                end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
 393                if  ((pagenr >= addr) && (pagenr < end))
 394                        return 1;
 395        }
 396        return 0;
 397}
 398
 399static inline int page_kills_ppro(unsigned long pagenr)
 400{
 401        if(pagenr >= 0x70000 && pagenr <= 0x7003F)
 402                return 1;
 403        return 0;
 404}
 405        
 406void __init mem_init(void)
 407{
 408        extern int ppro_with_ram_bug(void);
 409        int codesize, reservedpages, datasize, initsize;
 410        int tmp;
 411        int bad_ppro;
 412
 413        if (!mem_map)
 414                BUG();
 415        
 416        bad_ppro = ppro_with_ram_bug();
 417
 418#ifdef CONFIG_HIGHMEM
 419        highmem_start_page = mem_map + highstart_pfn;
 420        max_mapnr = num_physpages = highend_pfn;
 421#else
 422        max_mapnr = num_physpages = max_low_pfn;
 423#endif
 424        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 425
 426        /* clear the zero-page */
 427        memset(empty_zero_page, 0, PAGE_SIZE);
 428
 429        /* this will put all low memory onto the freelists */
 430        totalram_pages += free_all_bootmem();
 431
 432        reservedpages = 0;
 433        for (tmp = 0; tmp < max_low_pfn; tmp++)
 434                /*
 435                 * Only count reserved RAM pages
 436                 */
 437                if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
 438                        reservedpages++;
 439#ifdef CONFIG_HIGHMEM
 440        for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
 441                struct page *page = mem_map + tmp;
 442
 443                if (!page_is_ram(tmp)) {
 444                        SetPageReserved(page);
 445                        continue;
 446                }
 447                if (bad_ppro && page_kills_ppro(tmp))
 448                {
 449                        SetPageReserved(page);
 450                        continue;
 451                }
 452                ClearPageReserved(page);
 453                set_bit(PG_highmem, &page->flags);
 454                atomic_set(&page->count, 1);
 455                __free_page(page);
 456                totalhigh_pages++;
 457        }
 458        totalram_pages += totalhigh_pages;
 459#endif
 460        codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 461        datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 462        initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 463
 464        printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
 465                (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 466                max_mapnr << (PAGE_SHIFT-10),
 467                codesize >> 10,
 468                reservedpages << (PAGE_SHIFT-10),
 469                datasize >> 10,
 470                initsize >> 10,
 471                (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
 472               );
 473
 474#if CONFIG_X86_PAE
 475        if (!cpu_has_pae)
 476                panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
 477#endif
 478        if (boot_cpu_data.wp_works_ok < 0)
 479                test_wp_bit();
 480
 481        /*
 482         * Subtle. SMP is doing it's boot stuff late (because it has to
 483         * fork idle threads) - but it also needs low mappings for the
 484         * protected-mode entry to work. We zap these entries only after
 485         * the WP-bit has been tested.
 486         */
 487#ifndef CONFIG_SMP
 488        zap_low_mappings();
 489#endif
 490
 491}
 492
 493/* Put this after the callers, so that it cannot be inlined */
 494static int do_test_wp_bit(unsigned long vaddr)
 495{
 496        char tmp_reg;
 497        int flag;
 498
 499        __asm__ __volatile__(
 500                "       movb %0,%1      \n"
 501                "1:     movb %1,%0      \n"
 502                "       xorl %2,%2      \n"
 503                "2:                     \n"
 504                ".section __ex_table,\"a\"\n"
 505                "       .align 4        \n"
 506                "       .long 1b,2b     \n"
 507                ".previous              \n"
 508                :"=m" (*(char *) vaddr),
 509                 "=q" (tmp_reg),
 510                 "=r" (flag)
 511                :"2" (1)
 512                :"memory");
 513        
 514        return flag;
 515}
 516
 517void free_initmem(void)
 518{
 519        unsigned long addr;
 520
 521        addr = (unsigned long)(&__init_begin);
 522        for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
 523                ClearPageReserved(virt_to_page(addr));
 524                set_page_count(virt_to_page(addr), 1);
 525                free_page(addr);
 526                totalram_pages++;
 527        }
 528        printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
 529}
 530
 531#ifdef CONFIG_BLK_DEV_INITRD
 532void free_initrd_mem(unsigned long start, unsigned long end)
 533{
 534        if (start < end)
 535                printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 536        for (; start < end; start += PAGE_SIZE) {
 537                ClearPageReserved(virt_to_page(start));
 538                set_page_count(virt_to_page(start), 1);
 539                free_page(start);
 540                totalram_pages++;
 541        }
 542}
 543#endif
 544
 545#if defined(CONFIG_X86_PAE)
 546static struct kmem_cache_s *pae_pgd_cachep;
 547
 548void __init pgtable_cache_init(void)
 549{
 550        /*
 551         * PAE pgds must be 16-byte aligned:
 552         */
 553        pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0,
 554                SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
 555        if (!pae_pgd_cachep)
 556                panic("init_pae(): Cannot alloc pae_pgd SLAB cache");
 557}
 558
 559pgd_t *pgd_alloc(struct mm_struct *mm)
 560{
 561        int i;
 562        pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL);
 563
 564        if (pgd) {
 565                for (i = 0; i < USER_PTRS_PER_PGD; i++) {
 566                        unsigned long pmd = __get_free_page(GFP_KERNEL);
 567                        if (!pmd)
 568                                goto out_oom;
 569                        clear_page(pmd);
 570                        set_pgd(pgd + i, __pgd(1 + __pa(pmd)));
 571                }
 572                memcpy(pgd + USER_PTRS_PER_PGD,
 573                        swapper_pg_dir + USER_PTRS_PER_PGD,
 574                        (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 575        }
 576        return pgd;
 577out_oom:
 578        for (i--; i >= 0; i--)
 579                free_page((unsigned long)__va(pgd_val(pgd[i])-1));
 580        kmem_cache_free(pae_pgd_cachep, pgd);
 581        return NULL;
 582}
 583
 584void pgd_free(pgd_t *pgd)
 585{
 586        int i;
 587
 588        for (i = 0; i < USER_PTRS_PER_PGD; i++)
 589                free_page((unsigned long)__va(pgd_val(pgd[i])-1));
 590        kmem_cache_free(pae_pgd_cachep, pgd);
 591}
 592
 593#else
 594
 595pgd_t *pgd_alloc(struct mm_struct *mm)
 596{
 597        pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
 598
 599        if (pgd) {
 600                memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
 601                memcpy(pgd + USER_PTRS_PER_PGD,
 602                        swapper_pg_dir + USER_PTRS_PER_PGD,
 603                        (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 604        }
 605        return pgd;
 606}
 607
 608void pgd_free(pgd_t *pgd)
 609{
 610        free_page((unsigned long)pgd);
 611}
 612#endif /* CONFIG_X86_PAE */
 613
 614pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 615{
 616        int count = 0;
 617        pte_t *pte;
 618   
 619        do {
 620                pte = (pte_t *) __get_free_page(GFP_KERNEL);
 621                if (pte)
 622                        clear_page(pte);
 623                else {
 624                        current->state = TASK_UNINTERRUPTIBLE;
 625                        schedule_timeout(HZ);
 626                }
 627        } while (!pte && (count++ < 10));
 628        return pte;
 629}
 630
 631struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 632{
 633        int count = 0;
 634        struct page *pte;
 635   
 636        do {
 637#if CONFIG_HIGHPTE
 638                pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
 639#else
 640                pte = alloc_pages(GFP_KERNEL, 0);
 641#endif
 642                if (pte)
 643                        clear_highpage(pte);
 644                else {
 645                        current->state = TASK_UNINTERRUPTIBLE;
 646                        schedule_timeout(HZ);
 647                }
 648        } while (!pte && (count++ < 10));
 649        return pte;
 650}
 651
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.