linux/arch/ppc64/mm/init.c
<<
>>
Prefs
   1/*
   2 *  PowerPC version 
   3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
   4 *
   5 *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
   6 *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
   7 *    Copyright (C) 1996 Paul Mackerras
   8 *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
   9 *
  10 *  Derived from "arch/i386/mm/init.c"
  11 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  12 *
  13 *  Dave Engebretsen <engebret@us.ibm.com>
  14 *      Rework for PPC64 port.
  15 *
  16 *  This program is free software; you can redistribute it and/or
  17 *  modify it under the terms of the GNU General Public License
  18 *  as published by the Free Software Foundation; either version
  19 *  2 of the License, or (at your option) any later version.
  20 *
  21 */
  22
  23#include <linux/config.h>
  24#include <linux/signal.h>
  25#include <linux/sched.h>
  26#include <linux/kernel.h>
  27#include <linux/errno.h>
  28#include <linux/string.h>
  29#include <linux/types.h>
  30#include <linux/mman.h>
  31#include <linux/mm.h>
  32#include <linux/swap.h>
  33#include <linux/stddef.h>
  34#include <linux/vmalloc.h>
  35#include <linux/init.h>
  36#include <linux/delay.h>
  37#include <linux/bootmem.h>
  38#include <linux/highmem.h>
  39#include <linux/idr.h>
  40#include <linux/nodemask.h>
  41
  42#include <asm/pgalloc.h>
  43#include <asm/page.h>
  44#include <asm/abs_addr.h>
  45#include <asm/prom.h>
  46#include <asm/lmb.h>
  47#include <asm/rtas.h>
  48#include <asm/io.h>
  49#include <asm/mmu_context.h>
  50#include <asm/pgtable.h>
  51#include <asm/mmu.h>
  52#include <asm/uaccess.h>
  53#include <asm/smp.h>
  54#include <asm/machdep.h>
  55#include <asm/tlb.h>
  56#include <asm/eeh.h>
  57#include <asm/processor.h>
  58#include <asm/mmzone.h>
  59#include <asm/cputable.h>
  60#include <asm/ppcdebug.h>
  61#include <asm/sections.h>
  62#include <asm/system.h>
  63#include <asm/iommu.h>
  64#include <asm/abs_addr.h>
  65
  66int mem_init_done;
  67unsigned long ioremap_bot = IMALLOC_BASE;
  68static unsigned long phbs_io_bot = PHBS_IO_BASE;
  69
  70extern pgd_t swapper_pg_dir[];
  71extern struct task_struct *current_set[NR_CPUS];
  72
  73extern pgd_t ioremap_dir[];
  74pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir;
  75
  76unsigned long klimit = (unsigned long)_end;
  77
  78unsigned long _SDR1=0;
  79unsigned long _ASR=0;
  80
  81/* max amount of RAM to use */
  82unsigned long __max_memory;
  83
  84/* info on what we think the IO hole is */
  85unsigned long   io_hole_start;
  86unsigned long   io_hole_size;
  87
  88void show_mem(void)
  89{
  90        unsigned long total = 0, reserved = 0;
  91        unsigned long shared = 0, cached = 0;
  92        struct page *page;
  93        pg_data_t *pgdat;
  94        unsigned long i;
  95
  96        printk("Mem-info:\n");
  97        show_free_areas();
  98        printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
  99        for_each_pgdat(pgdat) {
 100                for (i = 0; i < pgdat->node_spanned_pages; i++) {
 101                        page = pgdat->node_mem_map + i;
 102                        total++;
 103                        if (PageReserved(page))
 104                                reserved++;
 105                        else if (PageSwapCache(page))
 106                                cached++;
 107                        else if (page_count(page))
 108                                shared += page_count(page) - 1;
 109                }
 110        }
 111        printk("%ld pages of RAM\n", total);
 112        printk("%ld reserved pages\n", reserved);
 113        printk("%ld pages shared\n", shared);
 114        printk("%ld pages swap cached\n", cached);
 115}
 116
 117#ifdef CONFIG_PPC_ISERIES
 118
 119void __iomem *ioremap(unsigned long addr, unsigned long size)
 120{
 121        return (void __iomem *)addr;
 122}
 123
 124extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
 125                       unsigned long flags)
 126{
 127        return (void __iomem *)addr;
 128}
 129
 130void iounmap(volatile void __iomem *addr)
 131{
 132        return;
 133}
 134
 135#else
 136
 137/*
 138 * map_io_page currently only called by __ioremap
 139 * map_io_page adds an entry to the ioremap page table
 140 * and adds an entry to the HPT, possibly bolting it
 141 */
 142static void map_io_page(unsigned long ea, unsigned long pa, int flags)
 143{
 144        pgd_t *pgdp;
 145        pmd_t *pmdp;
 146        pte_t *ptep;
 147        unsigned long vsid;
 148
 149        if (mem_init_done) {
 150                spin_lock(&ioremap_mm.page_table_lock);
 151                pgdp = pgd_offset_i(ea);
 152                pmdp = pmd_alloc(&ioremap_mm, pgdp, ea);
 153                ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea);
 154
 155                pa = abs_to_phys(pa);
 156                set_pte(ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 157                spin_unlock(&ioremap_mm.page_table_lock);
 158        } else {
 159                unsigned long va, vpn, hash, hpteg;
 160
 161                /*
 162                 * If the mm subsystem is not fully up, we cannot create a
 163                 * linux page table entry for this mapping.  Simply bolt an
 164                 * entry in the hardware page table.
 165                 */
 166                vsid = get_kernel_vsid(ea);
 167                va = (vsid << 28) | (ea & 0xFFFFFFF);
 168                vpn = va >> PAGE_SHIFT;
 169
 170                hash = hpt_hash(vpn, 0);
 171
 172                hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 173
 174                /* Panic if a pte grpup is full */
 175                if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
 176                                       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
 177                                       1, 0) == -1) {
 178                        panic("map_io_page: could not insert mapping");
 179                }
 180        }
 181}
 182
 183
 184static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
 185                            unsigned long ea, unsigned long size,
 186                            unsigned long flags)
 187{
 188        unsigned long i;
 189
 190        if ((flags & _PAGE_PRESENT) == 0)
 191                flags |= pgprot_val(PAGE_KERNEL);
 192        if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU))
 193                flags |= _PAGE_GUARDED;
 194
 195        for (i = 0; i < size; i += PAGE_SIZE) {
 196                map_io_page(ea+i, pa+i, flags);
 197        }
 198
 199        return (void __iomem *) (ea + (addr & ~PAGE_MASK));
 200}
 201
 202
 203void __iomem *
 204ioremap(unsigned long addr, unsigned long size)
 205{
 206        return __ioremap(addr, size, _PAGE_NO_CACHE);
 207}
 208
 209void __iomem *
 210__ioremap(unsigned long addr, unsigned long size, unsigned long flags)
 211{
 212        unsigned long pa, ea;
 213
 214        /*
 215         * Choose an address to map it to.
 216         * Once the imalloc system is running, we use it.
 217         * Before that, we map using addresses going
 218         * up from ioremap_bot.  imalloc will use
 219         * the addresses from ioremap_bot through
 220         * IMALLOC_END (0xE000001fffffffff)
 221         * 
 222         */
 223        pa = addr & PAGE_MASK;
 224        size = PAGE_ALIGN(addr + size) - pa;
 225
 226        if (size == 0)
 227                return NULL;
 228
 229        if (mem_init_done) {
 230                struct vm_struct *area;
 231                area = im_get_free_area(size);
 232                if (area == NULL)
 233                        return NULL;
 234                ea = (unsigned long)(area->addr);
 235        } else {
 236                ea = ioremap_bot;
 237                ioremap_bot += size;
 238        }
 239
 240        return __ioremap_com(addr, pa, ea, size, flags);
 241}
 242
 243#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
 244
 245int __ioremap_explicit(unsigned long pa, unsigned long ea,
 246                       unsigned long size, unsigned long flags)
 247{
 248        struct vm_struct *area;
 249        
 250        /* For now, require page-aligned values for pa, ea, and size */
 251        if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
 252            !IS_PAGE_ALIGNED(size)) {
 253                printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__);
 254                return 1;
 255        }
 256        
 257        if (!mem_init_done) {
 258                /* Two things to consider in this case:
 259                 * 1) No records will be kept (imalloc, etc) that the region
 260                 *    has been remapped
 261                 * 2) It won't be easy to iounmap() the region later (because
 262                 *    of 1)
 263                 */
 264                ;
 265        } else {
 266                area = im_get_area(ea, size,
 267                        IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
 268                if (area == NULL) {
 269                        /* Expected when PHB-dlpar is in play */
 270                        return 1;
 271                }
 272                if (ea != (unsigned long) area->addr) {
 273                        printk(KERN_ERR "unexpected addr return from im_get_area\n");
 274                        return 1;
 275                }
 276        }
 277        
 278        if (__ioremap_com(pa, pa, ea, size, flags) != (void *) ea) {
 279                printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
 280                return 1;
 281        }
 282
 283        return 0;
 284}
 285
 286static void unmap_im_area_pte(pmd_t *pmd, unsigned long address,
 287                                  unsigned long size)
 288{
 289        unsigned long end;
 290        pte_t *pte;
 291
 292        if (pmd_none(*pmd))
 293                return;
 294        if (pmd_bad(*pmd)) {
 295                pmd_ERROR(*pmd);
 296                pmd_clear(pmd);
 297                return;
 298        }
 299
 300        pte = pte_offset_kernel(pmd, address);
 301        address &= ~PMD_MASK;
 302        end = address + size;
 303        if (end > PMD_SIZE)
 304                end = PMD_SIZE;
 305
 306        do {
 307                pte_t page;
 308                page = ptep_get_and_clear(pte);
 309                address += PAGE_SIZE;
 310                pte++;
 311                if (pte_none(page))
 312                        continue;
 313                if (pte_present(page))
 314                        continue;
 315                printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
 316        } while (address < end);
 317}
 318
 319static void unmap_im_area_pmd(pgd_t *dir, unsigned long address,
 320                                  unsigned long size)
 321{
 322        unsigned long end;
 323        pmd_t *pmd;
 324
 325        if (pgd_none(*dir))
 326                return;
 327        if (pgd_bad(*dir)) {
 328                pgd_ERROR(*dir);
 329                pgd_clear(dir);
 330                return;
 331        }
 332
 333        pmd = pmd_offset(dir, address);
 334        address &= ~PGDIR_MASK;
 335        end = address + size;
 336        if (end > PGDIR_SIZE)
 337                end = PGDIR_SIZE;
 338
 339        do {
 340                unmap_im_area_pte(pmd, address, end - address);
 341                address = (address + PMD_SIZE) & PMD_MASK;
 342                pmd++;
 343        } while (address < end);
 344}
 345
 346/*  
 347 * Unmap an IO region and remove it from imalloc'd list.
 348 * Access to IO memory should be serialized by driver.
 349 * This code is modeled after vmalloc code - unmap_vm_area()
 350 *
 351 * XXX  what about calls before mem_init_done (ie python_countermeasures())     
 352 */
 353void iounmap(volatile void __iomem *token)
 354{
 355        unsigned long address, start, end, size;
 356        struct mm_struct *mm;
 357        pgd_t *dir;
 358        void *addr;
 359
 360        if (!mem_init_done) {
 361                return;
 362        }
 363        
 364        addr = (void *) ((unsigned long __force) token & PAGE_MASK);
 365        
 366        if ((size = im_free(addr)) == 0) {
 367                return;
 368        }
 369
 370        address = (unsigned long)addr; 
 371        start = address;
 372        end = address + size;
 373
 374        mm = &ioremap_mm;
 375        spin_lock(&mm->page_table_lock);
 376
 377        dir = pgd_offset_i(address);
 378        flush_cache_vunmap(address, end);
 379        do {
 380                unmap_im_area_pmd(dir, address, end - address);
 381                address = (address + PGDIR_SIZE) & PGDIR_MASK;
 382                dir++;
 383        } while (address && (address < end));
 384        flush_tlb_kernel_range(start, end);
 385
 386        spin_unlock(&mm->page_table_lock);
 387        return;
 388}
 389
 390static int iounmap_subset_regions(unsigned long addr, unsigned long size)
 391{
 392        struct vm_struct *area;
 393
 394        /* Check whether subsets of this region exist */
 395        area = im_get_area(addr, size, IM_REGION_SUPERSET);
 396        if (area == NULL)
 397                return 1;
 398
 399        while (area) {
 400                iounmap((void __iomem *) area->addr);
 401                area = im_get_area(addr, size,
 402                                IM_REGION_SUPERSET);
 403        }
 404
 405        return 0;
 406}
 407
 408int iounmap_explicit(volatile void __iomem *start, unsigned long size)
 409{
 410        struct vm_struct *area;
 411        unsigned long addr;
 412        int rc;
 413        
 414        addr = (unsigned long __force) start & PAGE_MASK;
 415
 416        /* Verify that the region either exists or is a subset of an existing
 417         * region.  In the latter case, split the parent region to create 
 418         * the exact region 
 419         */
 420        area = im_get_area(addr, size, 
 421                            IM_REGION_EXISTS | IM_REGION_SUBSET);
 422        if (area == NULL) {
 423                /* Determine whether subset regions exist.  If so, unmap */
 424                rc = iounmap_subset_regions(addr, size);
 425                if (rc) {
 426                        printk(KERN_ERR
 427                               "%s() cannot unmap nonexistent range 0x%lx\n",
 428                                __FUNCTION__, addr);
 429                        return 1;
 430                }
 431        } else {
 432                iounmap((void __iomem *) area->addr);
 433        }
 434        /*
 435         * FIXME! This can't be right:
 436        iounmap(area->addr);
 437         * Maybe it should be "iounmap(area);"
 438         */
 439        return 0;
 440}
 441
 442#endif
 443
 444void free_initmem(void)
 445{
 446        unsigned long addr;
 447
 448        addr = (unsigned long)__init_begin;
 449        for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
 450                ClearPageReserved(virt_to_page(addr));
 451                set_page_count(virt_to_page(addr), 1);
 452                free_page(addr);
 453                totalram_pages++;
 454        }
 455        printk ("Freeing unused kernel memory: %luk freed\n",
 456                ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
 457}
 458
 459#ifdef CONFIG_BLK_DEV_INITRD
 460void free_initrd_mem(unsigned long start, unsigned long end)
 461{
 462        if (start < end)
 463                printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 464        for (; start < end; start += PAGE_SIZE) {
 465                ClearPageReserved(virt_to_page(start));
 466                set_page_count(virt_to_page(start), 1);
 467                free_page(start);
 468                totalram_pages++;
 469        }
 470}
 471#endif
 472
 473static DEFINE_SPINLOCK(mmu_context_lock);
 474static DEFINE_IDR(mmu_context_idr);
 475
 476int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 477{
 478        int index;
 479        int err;
 480
 481#ifdef CONFIG_HUGETLB_PAGE
 482        /* We leave htlb_segs as it was, but for a fork, we need to
 483         * clear the huge_pgdir. */
 484        mm->context.huge_pgdir = NULL;
 485#endif
 486
 487again:
 488        if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
 489                return -ENOMEM;
 490
 491        spin_lock(&mmu_context_lock);
 492        err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
 493        spin_unlock(&mmu_context_lock);
 494
 495        if (err == -EAGAIN)
 496                goto again;
 497        else if (err)
 498                return err;
 499
 500        if (index > MAX_CONTEXT) {
 501                idr_remove(&mmu_context_idr, index);
 502                return -ENOMEM;
 503        }
 504
 505        mm->context.id = index;
 506
 507        return 0;
 508}
 509
 510void destroy_context(struct mm_struct *mm)
 511{
 512        spin_lock(&mmu_context_lock);
 513        idr_remove(&mmu_context_idr, mm->context.id);
 514        spin_unlock(&mmu_context_lock);
 515
 516        mm->context.id = NO_CONTEXT;
 517
 518        hugetlb_mm_free_pgd(mm);
 519}
 520
 521/*
 522 * Do very early mm setup.
 523 */
 524void __init mm_init_ppc64(void)
 525{
 526#ifndef CONFIG_PPC_ISERIES
 527        unsigned long i;
 528#endif
 529
 530        ppc64_boot_msg(0x100, "MM Init");
 531
 532        /* This is the story of the IO hole... please, keep seated,
 533         * unfortunately, we are out of oxygen masks at the moment.
 534         * So we need some rough way to tell where your big IO hole
 535         * is. On pmac, it's between 2G and 4G, on POWER3, it's around
 536         * that area as well, on POWER4 we don't have one, etc...
 537         * We need that as a "hint" when sizing the TCE table on POWER3
 538         * So far, the simplest way that seem work well enough for us it
 539         * to just assume that the first discontinuity in our physical
 540         * RAM layout is the IO hole. That may not be correct in the future
 541         * (and isn't on iSeries but then we don't care ;)
 542         */
 543
 544#ifndef CONFIG_PPC_ISERIES
 545        for (i = 1; i < lmb.memory.cnt; i++) {
 546                unsigned long base, prevbase, prevsize;
 547
 548                prevbase = lmb.memory.region[i-1].physbase;
 549                prevsize = lmb.memory.region[i-1].size;
 550                base = lmb.memory.region[i].physbase;
 551                if (base > (prevbase + prevsize)) {
 552                        io_hole_start = prevbase + prevsize;
 553                        io_hole_size = base  - (prevbase + prevsize);
 554                        break;
 555                }
 556        }
 557#endif /* CONFIG_PPC_ISERIES */
 558        if (io_hole_start)
 559                printk("IO Hole assumed to be %lx -> %lx\n",
 560                       io_hole_start, io_hole_start + io_hole_size - 1);
 561
 562        ppc64_boot_msg(0x100, "MM Init Done");
 563}
 564
 565/*
 566 * This is called by /dev/mem to know if a given address has to
 567 * be mapped non-cacheable or not
 568 */
 569int page_is_ram(unsigned long pfn)
 570{
 571        int i;
 572        unsigned long paddr = (pfn << PAGE_SHIFT);
 573
 574        for (i=0; i < lmb.memory.cnt; i++) {
 575                unsigned long base;
 576
 577#ifdef CONFIG_MSCHUNKS
 578                base = lmb.memory.region[i].physbase;
 579#else
 580                base = lmb.memory.region[i].base;
 581#endif
 582                if ((paddr >= base) &&
 583                        (paddr < (base + lmb.memory.region[i].size))) {
 584                        return 1;
 585                }
 586        }
 587
 588        return 0;
 589}
 590EXPORT_SYMBOL(page_is_ram);
 591
 592/*
 593 * Initialize the bootmem system and give it all the memory we
 594 * have available.
 595 */
 596#ifndef CONFIG_DISCONTIGMEM
 597void __init do_init_bootmem(void)
 598{
 599        unsigned long i;
 600        unsigned long start, bootmap_pages;
 601        unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
 602        int boot_mapsize;
 603
 604        /*
 605         * Find an area to use for the bootmem bitmap.  Calculate the size of
 606         * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
 607         * Add 1 additional page in case the address isn't page-aligned.
 608         */
 609        bootmap_pages = bootmem_bootmap_pages(total_pages);
 610
 611        start = abs_to_phys(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE));
 612        BUG_ON(!start);
 613
 614        boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
 615
 616        max_pfn = max_low_pfn;
 617
 618        /* add all physical memory to the bootmem map. Also find the first */
 619        for (i=0; i < lmb.memory.cnt; i++) {
 620                unsigned long physbase, size;
 621
 622                physbase = lmb.memory.region[i].physbase;
 623                size = lmb.memory.region[i].size;
 624                free_bootmem(physbase, size);
 625        }
 626
 627        /* reserve the sections we're already using */
 628        for (i=0; i < lmb.reserved.cnt; i++) {
 629                unsigned long physbase = lmb.reserved.region[i].physbase;
 630                unsigned long size = lmb.reserved.region[i].size;
 631
 632                reserve_bootmem(physbase, size);
 633        }
 634}
 635
 636/*
 637 * paging_init() sets up the page tables - in fact we've already done this.
 638 */
 639void __init paging_init(void)
 640{
 641        unsigned long zones_size[MAX_NR_ZONES];
 642        unsigned long zholes_size[MAX_NR_ZONES];
 643        unsigned long total_ram = lmb_phys_mem_size();
 644        unsigned long top_of_ram = lmb_end_of_DRAM();
 645
 646        printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 647               top_of_ram, total_ram);
 648        printk(KERN_INFO "Memory hole size: %ldMB\n",
 649               (top_of_ram - total_ram) >> 20);
 650        /*
 651         * All pages are DMA-able so we put them all in the DMA zone.
 652         */
 653        memset(zones_size, 0, sizeof(zones_size));
 654        memset(zholes_size, 0, sizeof(zholes_size));
 655
 656        zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
 657        zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
 658
 659        free_area_init_node(0, &contig_page_data, zones_size,
 660                            __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
 661        mem_map = contig_page_data.node_mem_map;
 662}
 663#endif /* CONFIG_DISCONTIGMEM */
 664
 665static struct kcore_list kcore_vmem;
 666
 667static int __init setup_kcore(void)
 668{
 669        int i;
 670
 671        for (i=0; i < lmb.memory.cnt; i++) {
 672                unsigned long physbase, size;
 673                struct kcore_list *kcore_mem;
 674
 675                physbase = lmb.memory.region[i].physbase;
 676                size = lmb.memory.region[i].size;
 677
 678                /* GFP_ATOMIC to avoid might_sleep warnings during boot */
 679                kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
 680                if (!kcore_mem)
 681                        panic("mem_init: kmalloc failed\n");
 682
 683                kclist_add(kcore_mem, __va(physbase), size);
 684        }
 685
 686        kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
 687
 688        return 0;
 689}
 690module_init(setup_kcore);
 691
 692void __init mem_init(void)
 693{
 694#ifdef CONFIG_DISCONTIGMEM
 695        int nid;
 696#endif
 697        pg_data_t *pgdat;
 698        unsigned long i;
 699        struct page *page;
 700        unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
 701
 702        num_physpages = max_low_pfn;    /* RAM is assumed contiguous */
 703        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 704
 705#ifdef CONFIG_DISCONTIGMEM
 706        for_each_online_node(nid) {
 707                if (NODE_DATA(nid)->node_spanned_pages != 0) {
 708                        printk("freeing bootmem node %x\n", nid);
 709                        totalram_pages +=
 710                                free_all_bootmem_node(NODE_DATA(nid));
 711                }
 712        }
 713#else
 714        max_mapnr = num_physpages;
 715        totalram_pages += free_all_bootmem();
 716#endif
 717
 718        for_each_pgdat(pgdat) {
 719                for (i = 0; i < pgdat->node_spanned_pages; i++) {
 720                        page = pgdat->node_mem_map + i;
 721                        if (PageReserved(page))
 722                                reservedpages++;
 723                }
 724        }
 725
 726        codesize = (unsigned long)&_etext - (unsigned long)&_stext;
 727        initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
 728        datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
 729        bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
 730
 731        printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
 732               "%luk reserved, %luk data, %luk bss, %luk init)\n",
 733                (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
 734                num_physpages << (PAGE_SHIFT-10),
 735                codesize >> 10,
 736                reservedpages << (PAGE_SHIFT-10),
 737                datasize >> 10,
 738                bsssize >> 10,
 739                initsize >> 10);
 740
 741        mem_init_done = 1;
 742
 743#ifdef CONFIG_PPC_ISERIES
 744        iommu_vio_init();
 745#endif
 746}
 747
 748/*
 749 * This is called when a page has been modified by the kernel.
 750 * It just marks the page as not i-cache clean.  We do the i-cache
 751 * flush later when the page is given to a user process, if necessary.
 752 */
 753void flush_dcache_page(struct page *page)
 754{
 755        if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
 756                return;
 757        /* avoid an atomic op if possible */
 758        if (test_bit(PG_arch_1, &page->flags))
 759                clear_bit(PG_arch_1, &page->flags);
 760}
 761
 762void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
 763{
 764        clear_page(page);
 765
 766        if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
 767                return;
 768        /*
 769         * We shouldnt have to do this, but some versions of glibc
 770         * require it (ld.so assumes zero filled pages are icache clean)
 771         * - Anton
 772         */
 773
 774        /* avoid an atomic op if possible */
 775        if (test_bit(PG_arch_1, &pg->flags))
 776                clear_bit(PG_arch_1, &pg->flags);
 777}
 778
 779void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
 780                    struct page *pg)
 781{
 782        copy_page(vto, vfrom);
 783
 784        /*
 785         * We should be able to use the following optimisation, however
 786         * there are two problems.
 787         * Firstly a bug in some versions of binutils meant PLT sections
 788         * were not marked executable.
 789         * Secondly the first word in the GOT section is blrl, used
 790         * to establish the GOT address. Until recently the GOT was
 791         * not marked executable.
 792         * - Anton
 793         */
 794#if 0
 795        if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
 796                return;
 797#endif
 798
 799        if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
 800                return;
 801
 802        /* avoid an atomic op if possible */
 803        if (test_bit(PG_arch_1, &pg->flags))
 804                clear_bit(PG_arch_1, &pg->flags);
 805}
 806
 807void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
 808                             unsigned long addr, int len)
 809{
 810        unsigned long maddr;
 811
 812        maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
 813        flush_icache_range(maddr, maddr + len);
 814}
 815
 816/*
 817 * This is called at the end of handling a user page fault, when the
 818 * fault has been handled by updating a PTE in the linux page tables.
 819 * We use it to preload an HPTE into the hash table corresponding to
 820 * the updated linux PTE.
 821 * 
 822 * This must always be called with the mm->page_table_lock held
 823 */
 824void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
 825                      pte_t pte)
 826{
 827        unsigned long vsid;
 828        void *pgdir;
 829        pte_t *ptep;
 830        int local = 0;
 831        cpumask_t tmp;
 832        unsigned long flags;
 833
 834        /* handle i-cache coherency */
 835        if (!(cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE) &&
 836            !(cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)) {
 837                unsigned long pfn = pte_pfn(pte);
 838                if (pfn_valid(pfn)) {
 839                        struct page *page = pfn_to_page(pfn);
 840                        if (!PageReserved(page)
 841                            && !test_bit(PG_arch_1, &page->flags)) {
 842                                __flush_dcache_icache(page_address(page));
 843                                set_bit(PG_arch_1, &page->flags);
 844                        }
 845                }
 846        }
 847
 848        /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
 849        if (!pte_young(pte))
 850                return;
 851
 852        pgdir = vma->vm_mm->pgd;
 853        if (pgdir == NULL)
 854                return;
 855
 856        ptep = find_linux_pte(pgdir, ea);
 857        if (!ptep)
 858                return;
 859
 860        vsid = get_vsid(vma->vm_mm->context.id, ea);
 861
 862        local_irq_save(flags);
 863        tmp = cpumask_of_cpu(smp_processor_id());
 864        if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
 865                local = 1;
 866
 867        __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
 868                    0x300, local);
 869        local_irq_restore(flags);
 870}
 871
 872void __iomem * reserve_phb_iospace(unsigned long size)
 873{
 874        void __iomem *virt_addr;
 875                
 876        if (phbs_io_bot >= IMALLOC_BASE) 
 877                panic("reserve_phb_iospace(): phb io space overflow\n");
 878                        
 879        virt_addr = (void __iomem *) phbs_io_bot;
 880        phbs_io_bot += size;
 881
 882        return virt_addr;
 883}
 884
 885kmem_cache_t *zero_cache;
 886
 887static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
 888{
 889        memset(pte, 0, PAGE_SIZE);
 890}
 891
 892void pgtable_cache_init(void)
 893{
 894        zero_cache = kmem_cache_create("zero",
 895                                PAGE_SIZE,
 896                                0,
 897                                SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
 898                                zero_ctor,
 899                                NULL);
 900        if (!zero_cache)
 901                panic("pgtable_cache_init(): could not create zero_cache!\n");
 902}
 903
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.