linux-old/arch/x86_64/mm/init.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/x86_64/mm/init.c
   3 *
   4 *  Copyright (C) 1995  Linus Torvalds
   5 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
   6 *  Copyright (C) 2002  Andi Kleen <ak@suse.de>
   7 */
   8
   9#include <linux/config.h>
  10#include <linux/signal.h>
  11#include <linux/sched.h>
  12#include <linux/kernel.h>
  13#include <linux/errno.h>
  14#include <linux/string.h>
  15#include <linux/types.h>
  16#include <linux/ptrace.h>
  17#include <linux/mman.h>
  18#include <linux/mm.h>
  19#include <linux/swap.h>
  20#include <linux/smp.h>
  21#include <linux/init.h>
  22#include <linux/blk.h>
  23#include <linux/pagemap.h>
  24#include <linux/bootmem.h>
  25
  26#include <asm/processor.h>
  27#include <asm/system.h>
  28#include <asm/uaccess.h>
  29#include <asm/pgtable.h>
  30#include <asm/pgalloc.h>
  31#include <asm/dma.h>
  32#include <asm/fixmap.h>
  33#include <asm/e820.h>
  34#include <asm/apic.h>
  35#include <asm/tlb.h>
  36#include <asm/pda.h>
  37#include <asm/mmu_context.h>
  38#include <asm/proto.h>
  39
  40mmu_gather_t mmu_gathers[NR_CPUS];
  41
  42static unsigned long totalram_pages;
  43
  44int do_check_pgt_cache(int low, int high)
  45{
  46        int freed = 0;
  47        if(read_pda(pgtable_cache_sz) > high) {
  48                do {
  49                        if (read_pda(pgd_quick)) {
  50                                pgd_free_slow(pgd_alloc_one_fast());
  51                                freed++;
  52                        }
  53                        if (read_pda(pmd_quick)) {
  54                                pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
  55                                freed++;
  56                        }
  57                        if (read_pda(pte_quick)) {
  58                                pte_free_slow(pte_alloc_one_fast(NULL, 0));
  59                                freed++;
  60                        }
  61                } while(read_pda(pgtable_cache_sz) > low);
  62        }
  63        return freed;
  64}
  65
  66#ifndef CONFIG_DISCONTIGMEM
  67/*
  68 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  69 * physical space so we can cache the place of the first one and move
  70 * around without checking the pgd every time.
  71 */
  72
  73void show_mem(void)
  74{
  75        int i, total = 0, reserved = 0;
  76        int shared = 0, cached = 0;
  77
  78        printk("Mem-info:\n");
  79        show_free_areas();
  80        printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
  81        i = max_mapnr;
  82        while (i-- > 0) {
  83                total++;
  84                if (PageReserved(mem_map+i))
  85                        reserved++;
  86                else if (PageSwapCache(mem_map+i))
  87                        cached++;
  88                else if (page_count(mem_map+i))
  89                        shared += page_count(mem_map+i) - 1;
  90        }
  91        printk("%d pages of RAM\n", total);
  92        printk("%d reserved pages\n",reserved);
  93        printk("%d pages shared\n",shared);
  94        printk("%d pages swap cached\n",cached);
  95        printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
  96        show_buffers();
  97}
  98#endif
  99
 100/* References to section boundaries */
 101
 102extern char _text, _etext, _edata, __bss_start, _end;
 103extern char __init_begin, __init_end;
 104
 105int after_bootmem;
 106
 107static void *spp_getpage(void)
 108{ 
 109        void *ptr;
 110        if (after_bootmem)
 111                ptr = (void *) get_free_page(GFP_ATOMIC); 
 112        else
 113                ptr = alloc_bootmem_low_pages(PAGE_SIZE); 
 114        if (!ptr)
 115                panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
 116        return ptr;
 117} 
 118
 119static void set_pte_phys(unsigned long vaddr,
 120                         unsigned long phys, pgprot_t prot)
 121{
 122        pml4_t *level4;
 123        pgd_t *pgd;
 124        pmd_t *pmd;
 125        pte_t *pte;
 126
 127        level4 = pml4_offset_k(vaddr);
 128        if (pml4_none(*level4)) {
 129                printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
 130                return;
 131        }
 132        pgd = level3_offset_k(level4, vaddr);
 133        if (pgd_none(*pgd)) {
 134                pmd = (pmd_t *) spp_getpage(); 
 135                set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
 136                if (pmd != pmd_offset(pgd, 0)) {
 137                        printk("PAGETABLE BUG #01!\n");
 138                        return;
 139                }
 140        }
 141        pmd = pmd_offset(pgd, vaddr);
 142        if (pmd_none(*pmd)) {
 143                pte = (pte_t *) spp_getpage();
 144                set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
 145                if (pte != pte_offset(pmd, 0)) {
 146                        printk("PAGETABLE BUG #02!\n");
 147                        return;
 148                }
 149        }
 150        pte = pte_offset(pmd, vaddr);
 151        set_pte(pte, mk_pte_phys(phys, prot));
 152
 153        /*
 154         * It's enough to flush this one mapping.
 155         * (PGE mappings get flushed as well)
 156         */
 157        __flush_tlb_one(vaddr);
 158}
 159
 160void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 161{
 162        unsigned long address = __fix_to_virt(idx);
 163
 164        if (idx >= __end_of_fixed_addresses) {
 165                printk("Invalid __set_fixmap\n");
 166                return;
 167        }
 168        set_pte_phys(address, phys, prot);
 169}
 170
 171extern pmd_t temp_boot_pmds[]; 
 172
 173unsigned long __initdata table_start, table_end; 
 174
 175static  struct temp_map { 
 176        pmd_t *pmd; 
 177        void  *address; 
 178        int    allocated; 
 179} temp_mappings[] __initdata = { 
 180        { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
 181        { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, 
 182        {}
 183}; 
 184
 185static __init void *alloc_low_page(int *index, unsigned long *phys) 
 186{ 
 187        struct temp_map *ti;
 188        int i; 
 189        unsigned long pfn = table_end++, paddr; 
 190        void *adr;
 191
 192        if (table_end >= end_pfn_map) 
 193                panic("alloc_low_page: ran out of page mappings"); 
 194        for (i = 0; temp_mappings[i].allocated; i++) {
 195                if (!temp_mappings[i].pmd) 
 196                        panic("alloc_low_page: ran out of temp mappings"); 
 197        } 
 198        ti = &temp_mappings[i];
 199        paddr = (pfn << PAGE_SHIFT) & PMD_MASK; 
 200        set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); 
 201        ti->allocated = 1; 
 202        __flush_tlb();         
 203        adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 
 204        *index = i; 
 205        *phys  = pfn * PAGE_SIZE;  
 206        return adr; 
 207} 
 208
 209static __init void unmap_low_page(int i)
 210{ 
 211        struct temp_map *ti = &temp_mappings[i];
 212        set_pmd(ti->pmd, __pmd(0));
 213        ti->allocated = 0; 
 214} 
 215
 216static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned long end)
 217{ 
 218        long i, j; 
 219
 220        i = pgd_index(address);
 221        pgd = pgd + i;
 222        for (; i < PTRS_PER_PGD; pgd++, i++) {
 223                int map; 
 224                unsigned long paddr, pmd_phys;
 225                pmd_t *pmd;
 226
 227                paddr = (address & PML4_MASK) + i*PGDIR_SIZE; 
 228                if (paddr >= end) { 
 229                        for (; i < PTRS_PER_PGD; i++, pgd++) 
 230                                set_pgd(pgd, __pgd(0)); 
 231                        break;
 232                } 
 233
 234                if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) { 
 235                        set_pgd(pgd, __pgd(0)); 
 236                        continue;
 237                } 
 238
 239                pmd = alloc_low_page(&map, &pmd_phys);
 240                set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
 241                for (j = 0; j < PTRS_PER_PMD; pmd++, j++ , paddr += PMD_SIZE) {
 242                        unsigned long pe;
 243
 244                        if (paddr >= end) { 
 245                                for (; j < PTRS_PER_PMD; j++, pmd++)
 246                                        set_pmd(pmd,  __pmd(0)); 
 247                                break;
 248                        }
 249                        pe = _PAGE_PSE | _KERNPG_TABLE | _PAGE_NX | _PAGE_GLOBAL | paddr;
 250                        pe &= __supported_pte_mask; 
 251                        set_pmd(pmd, __pmd(pe));
 252                }
 253                unmap_low_page(map);
 254        }
 255        __flush_tlb();
 256} 
 257
 258/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
 259   This runs before bootmem is initialized and gets pages directly from the 
 260   physical memory. To access them they are temporarily mapped. */
 261void __init init_memory_mapping(void) 
 262{ 
 263        unsigned long adr;             
 264        unsigned long end;
 265        unsigned long next; 
 266        unsigned long pgds, pmds, tables; 
 267
 268        end = end_pfn_map << PAGE_SHIFT; 
 269
 270        /* 
 271         * Find space for the kernel direct mapping tables.
 272         * Later we should allocate these tables in the local node of the memory
 273         * mapped.  Unfortunately this is done currently before the nodes are 
 274         * discovered.
 275         */
 276
 277        pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
 278        pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 
 279        tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE); 
 280
 281        /* Direct mapping must currently fit below the kernel in the first MB.
 282           This is because we have no way to tell the later passes to not reuse
 283           the memory, until bootmem is initialised */
 284        /* Should limit MAXMEM for this */
 285        table_start = find_e820_area(/*0*/ 0x8000, __pa_symbol(&_text), tables); 
 286        if (table_start == -1UL) 
 287                panic("Cannot find space for the kernel page tables"); 
 288
 289        table_start >>= PAGE_SHIFT; 
 290        table_end = table_start;
 291       
 292        end += __PAGE_OFFSET; /* turn virtual */  
 293
 294        for (adr = PAGE_OFFSET; adr < end; adr = next) { 
 295                int map;
 296                unsigned long pgd_phys; 
 297                pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
 298                next = adr + PML4_SIZE;
 299                if (next > end) 
 300                        next = end; 
 301
 302                phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET); 
 303                set_pml4(init_level4_pgt + pml4_index(adr), 
 304                         mk_kernel_pml4(pgd_phys, KERNPG_TABLE));
 305                unmap_low_page(map);   
 306        } 
 307        asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
 308        __flush_tlb_all();
 309        printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, 
 310               table_start<<PAGE_SHIFT, 
 311               table_end<<PAGE_SHIFT);
 312} 
 313
 314void __init zap_low_mappings (void)
 315{
 316        int i;
 317        for (i = 0; i < NR_CPUS; i++) {
 318                if (cpu_pda[i].level4_pgt) 
 319                        cpu_pda[i].level4_pgt[0] = 0; 
 320        }
 321
 322        flush_tlb_all();
 323}
 324
 325#ifndef CONFIG_DISCONTIGMEM
 326void __init paging_init(void)
 327{
 328        unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
 329        unsigned int max_dma;
 330        
 331        max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 332        if (end_pfn < max_dma)
 333                zones_size[ZONE_DMA] = end_pfn;
 334        else {
 335                zones_size[ZONE_DMA] = max_dma;
 336                zones_size[ZONE_NORMAL] = end_pfn - max_dma;
 337        }
 338        free_area_init(zones_size);
 339}
 340
 341static inline int page_is_ram (unsigned long pagenr)
 342{
 343        int i;
 344
 345        for (i = 0; i < e820.nr_map; i++) {
 346                unsigned long addr, end;
 347
 348                if (e820.map[i].type != E820_RAM)       /* not usable memory */
 349                        continue;
 350                /*
 351                 *      !!!FIXME!!! Some BIOSen report areas as RAM that
 352                 *      are not. Notably the 640->1Mb area. We need a sanity
 353                 *      check here.
 354                 */
 355                addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
 356                end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
 357                if  ((pagenr >= addr) && (pagenr < end))
 358                        return 1;
 359        }
 360        return 0;
 361}
 362#endif
 363
 364void __init mem_init(void)
 365{
 366        unsigned long codesize, reservedpages, datasize, initsize;
 367        unsigned long tmp;
 368
 369        max_mapnr = end_pfn; 
 370        num_physpages = end_pfn; /* XXX not true because of holes */
 371        high_memory = (void *) __va(end_pfn << PAGE_SHIFT);
 372
 373        /* clear the zero-page */
 374        memset(empty_zero_page, 0, PAGE_SIZE);
 375
 376        reservedpages = 0;
 377
 378        /* this will put all low memory onto the freelists */
 379#ifdef CONFIG_DISCONTIGMEM
 380        totalram_pages += numa_free_all_bootmem();
 381        tmp = 0;
 382        /* should count reserved pages here for all nodes */ 
 383#else
 384        if (!mem_map) BUG();
 385
 386        totalram_pages += free_all_bootmem();
 387
 388        for (tmp = 0; tmp < end_pfn; tmp++)
 389                /*
 390                 * Only count reserved RAM pages
 391                 */
 392                if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
 393                        reservedpages++;
 394#endif
 395
 396        after_bootmem = 1;
 397
 398        codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 399        datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 400        initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 401
 402        printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
 403                (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 404                max_mapnr << (PAGE_SHIFT-10),
 405                codesize >> 10,
 406                reservedpages << (PAGE_SHIFT-10),
 407                datasize >> 10,
 408                initsize >> 10);
 409
 410        /*
 411         * Subtle. SMP is doing its boot stuff late (because it has to
 412         * fork idle threads) - but it also needs low mappings for the
 413         * protected-mode entry to work. We zap these entries only after
 414         * the WP-bit has been tested.
 415         */
 416#ifndef CONFIG_SMP
 417        zap_low_mappings();
 418#endif
 419}
 420
 421/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
 422   from the CPU leading to inconsistent cache lines. address and size
 423   must be aligned to 2MB boundaries. 
 424   Does nothing when the mapping doesn't exist. */
 425void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
 426{ 
 427        unsigned long end = address + size;
 428
 429        BUG_ON(address & ~LARGE_PAGE_MASK);
 430        BUG_ON(size & ~LARGE_PAGE_MASK); 
 431        
 432        for (; address < end; address += LARGE_PAGE_SIZE) { 
 433                pgd_t *pgd = pgd_offset_k(address);
 434                if (!pgd || pgd_none(*pgd))
 435                        continue; 
 436                pmd_t *pmd = pmd_offset(pgd, address);
 437                if (!pmd || pmd_none(*pmd))
 438                        continue; 
 439                if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
 440                        /* Could handle this, but it should not happen currently. */
 441                        printk(KERN_ERR 
 442                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
 443                        pmd_ERROR(*pmd); 
 444                } 
 445                set_pmd(pmd, __pmd(0));                 
 446        } 
 447        __flush_tlb_all(); 
 448} 
 449
 450void free_initmem(void)
 451{
 452        void *addr;
 453
 454        addr = (&__init_begin);
 455        for (; addr < (void *)(&__init_end); addr += PAGE_SIZE) {
 456                ClearPageReserved(virt_to_page(addr));
 457                set_page_count(virt_to_page(addr), 1);
 458#ifdef CONFIG_INIT_DEBUG
 459                memset((unsigned long)addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE); 
 460#endif
 461                free_page((unsigned long)addr);
 462                totalram_pages++;
 463        }
 464        printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
 465}
 466
 467#ifdef CONFIG_BLK_DEV_INITRD
 468void free_initrd_mem(unsigned long start, unsigned long end)
 469{
 470        if (start < (unsigned long)&_end)
 471                return;
 472        printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 473        for (; start < end; start += PAGE_SIZE) {
 474                ClearPageReserved(virt_to_page(start));
 475                set_page_count(virt_to_page(start), 1);
 476                free_page(start);
 477                totalram_pages++;
 478        }
 479}
 480#endif
 481
 482void si_meminfo(struct sysinfo *val)
 483{
 484        val->totalram = totalram_pages;
 485        val->sharedram = 0;
 486        val->freeram = nr_free_pages();
 487        val->bufferram = atomic_read(&buffermem_pages);
 488        val->totalhigh = 0;
 489        val->freehigh = nr_free_highpages();
 490        val->mem_unit = PAGE_SIZE;
 491        return;
 492}
 493
 494void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
 495{ 
 496        /* Should check here against the e820 map to avoid double free */ 
 497#ifdef CONFIG_DISCONTIGMEM
 498        reserve_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
 499#else                   
 500        reserve_bootmem(phys, len);    
 501#endif
 502}
 503
 504
 505void free_bootmem_generic(unsigned long phys, unsigned len) 
 506{ 
 507#ifdef CONFIG_DISCONTIGMEM
 508        free_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
 509#else                   
 510        free_bootmem(phys, len);    
 511#endif
 512}
 513
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.