linux/arch/arm/mm/mmu.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/mm/mmu.c
   3 *
   4 *  Copyright (C) 1995-2005 Russell King
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/module.h>
  11#include <linux/kernel.h>
  12#include <linux/errno.h>
  13#include <linux/init.h>
  14#include <linux/mman.h>
  15#include <linux/nodemask.h>
  16#include <linux/memblock.h>
  17#include <linux/fs.h>
  18
  19#include <asm/cputype.h>
  20#include <asm/sections.h>
  21#include <asm/cachetype.h>
  22#include <asm/setup.h>
  23#include <asm/sizes.h>
  24#include <asm/smp_plat.h>
  25#include <asm/tlb.h>
  26#include <asm/highmem.h>
  27#include <asm/traps.h>
  28
  29#include <asm/mach/arch.h>
  30#include <asm/mach/map.h>
  31
  32#include "mm.h"
  33
  34/*
  35 * empty_zero_page is a special page that is used for
  36 * zero-initialized data and COW.
  37 */
  38struct page *empty_zero_page;
  39EXPORT_SYMBOL(empty_zero_page);
  40
  41/*
  42 * The pmd table for the upper-most set of pages.
  43 */
  44pmd_t *top_pmd;
  45
  46#define CPOLICY_UNCACHED        0
  47#define CPOLICY_BUFFERED        1
  48#define CPOLICY_WRITETHROUGH    2
  49#define CPOLICY_WRITEBACK       3
  50#define CPOLICY_WRITEALLOC      4
  51
  52static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
  53static unsigned int ecc_mask __initdata = 0;
  54pgprot_t pgprot_user;
  55pgprot_t pgprot_kernel;
  56
  57EXPORT_SYMBOL(pgprot_user);
  58EXPORT_SYMBOL(pgprot_kernel);
  59
  60struct cachepolicy {
  61        const char      policy[16];
  62        unsigned int    cr_mask;
  63        unsigned int    pmd;
  64        pteval_t        pte;
  65};
  66
  67static struct cachepolicy cache_policies[] __initdata = {
  68        {
  69                .policy         = "uncached",
  70                .cr_mask        = CR_W|CR_C,
  71                .pmd            = PMD_SECT_UNCACHED,
  72                .pte            = L_PTE_MT_UNCACHED,
  73        }, {
  74                .policy         = "buffered",
  75                .cr_mask        = CR_C,
  76                .pmd            = PMD_SECT_BUFFERED,
  77                .pte            = L_PTE_MT_BUFFERABLE,
  78        }, {
  79                .policy         = "writethrough",
  80                .cr_mask        = 0,
  81                .pmd            = PMD_SECT_WT,
  82                .pte            = L_PTE_MT_WRITETHROUGH,
  83        }, {
  84                .policy         = "writeback",
  85                .cr_mask        = 0,
  86                .pmd            = PMD_SECT_WB,
  87                .pte            = L_PTE_MT_WRITEBACK,
  88        }, {
  89                .policy         = "writealloc",
  90                .cr_mask        = 0,
  91                .pmd            = PMD_SECT_WBWA,
  92                .pte            = L_PTE_MT_WRITEALLOC,
  93        }
  94};
  95
  96/*
  97 * These are useful for identifying cache coherency
  98 * problems by allowing the cache or the cache and
  99 * writebuffer to be turned off.  (Note: the write
 100 * buffer should not be on and the cache off).
 101 */
 102static int __init early_cachepolicy(char *p)
 103{
 104        int i;
 105
 106        for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
 107                int len = strlen(cache_policies[i].policy);
 108
 109                if (memcmp(p, cache_policies[i].policy, len) == 0) {
 110                        cachepolicy = i;
 111                        cr_alignment &= ~cache_policies[i].cr_mask;
 112                        cr_no_alignment &= ~cache_policies[i].cr_mask;
 113                        break;
 114                }
 115        }
 116        if (i == ARRAY_SIZE(cache_policies))
 117                printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
 118        /*
 119         * This restriction is partly to do with the way we boot; it is
 120         * unpredictable to have memory mapped using two different sets of
 121         * memory attributes (shared, type, and cache attribs).  We can not
 122         * change these attributes once the initial assembly has setup the
 123         * page tables.
 124         */
 125        if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 126                printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
 127                cachepolicy = CPOLICY_WRITEBACK;
 128        }
 129        flush_cache_all();
 130        set_cr(cr_alignment);
 131        return 0;
 132}
 133early_param("cachepolicy", early_cachepolicy);
 134
 135static int __init early_nocache(char *__unused)
 136{
 137        char *p = "buffered";
 138        printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
 139        early_cachepolicy(p);
 140        return 0;
 141}
 142early_param("nocache", early_nocache);
 143
 144static int __init early_nowrite(char *__unused)
 145{
 146        char *p = "uncached";
 147        printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
 148        early_cachepolicy(p);
 149        return 0;
 150}
 151early_param("nowb", early_nowrite);
 152
 153static int __init early_ecc(char *p)
 154{
 155        if (memcmp(p, "on", 2) == 0)
 156                ecc_mask = PMD_PROTECTION;
 157        else if (memcmp(p, "off", 3) == 0)
 158                ecc_mask = 0;
 159        return 0;
 160}
 161early_param("ecc", early_ecc);
 162
 163static int __init noalign_setup(char *__unused)
 164{
 165        cr_alignment &= ~CR_A;
 166        cr_no_alignment &= ~CR_A;
 167        set_cr(cr_alignment);
 168        return 1;
 169}
 170__setup("noalign", noalign_setup);
 171
 172#ifndef CONFIG_SMP
 173void adjust_cr(unsigned long mask, unsigned long set)
 174{
 175        unsigned long flags;
 176
 177        mask &= ~CR_A;
 178
 179        set &= mask;
 180
 181        local_irq_save(flags);
 182
 183        cr_no_alignment = (cr_no_alignment & ~mask) | set;
 184        cr_alignment = (cr_alignment & ~mask) | set;
 185
 186        set_cr((get_cr() & ~mask) | set);
 187
 188        local_irq_restore(flags);
 189}
 190#endif
 191
 192#define PROT_PTE_DEVICE         L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
 193#define PROT_SECT_DEVICE        PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 194
 195static struct mem_type mem_types[] = {
 196        [MT_DEVICE] = {           /* Strongly ordered / ARMv6 shared device */
 197                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 198                                  L_PTE_SHARED,
 199                .prot_l1        = PMD_TYPE_TABLE,
 200                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_S,
 201                .domain         = DOMAIN_IO,
 202        },
 203        [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
 204                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
 205                .prot_l1        = PMD_TYPE_TABLE,
 206                .prot_sect      = PROT_SECT_DEVICE,
 207                .domain         = DOMAIN_IO,
 208        },
 209        [MT_DEVICE_CACHED] = {    /* ioremap_cached */
 210                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
 211                .prot_l1        = PMD_TYPE_TABLE,
 212                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_WB,
 213                .domain         = DOMAIN_IO,
 214        },      
 215        [MT_DEVICE_WC] = {      /* ioremap_wc */
 216                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
 217                .prot_l1        = PMD_TYPE_TABLE,
 218                .prot_sect      = PROT_SECT_DEVICE,
 219                .domain         = DOMAIN_IO,
 220        },
 221        [MT_UNCACHED] = {
 222                .prot_pte       = PROT_PTE_DEVICE,
 223                .prot_l1        = PMD_TYPE_TABLE,
 224                .prot_sect      = PMD_TYPE_SECT | PMD_SECT_XN,
 225                .domain         = DOMAIN_IO,
 226        },
 227        [MT_CACHECLEAN] = {
 228                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 229                .domain    = DOMAIN_KERNEL,
 230        },
 231        [MT_MINICLEAN] = {
 232                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
 233                .domain    = DOMAIN_KERNEL,
 234        },
 235        [MT_LOW_VECTORS] = {
 236                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 237                                L_PTE_RDONLY,
 238                .prot_l1   = PMD_TYPE_TABLE,
 239                .domain    = DOMAIN_USER,
 240        },
 241        [MT_HIGH_VECTORS] = {
 242                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 243                                L_PTE_USER | L_PTE_RDONLY,
 244                .prot_l1   = PMD_TYPE_TABLE,
 245                .domain    = DOMAIN_USER,
 246        },
 247        [MT_MEMORY] = {
 248                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 249                .prot_l1   = PMD_TYPE_TABLE,
 250                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 251                .domain    = DOMAIN_KERNEL,
 252        },
 253        [MT_ROM] = {
 254                .prot_sect = PMD_TYPE_SECT,
 255                .domain    = DOMAIN_KERNEL,
 256        },
 257        [MT_MEMORY_NONCACHED] = {
 258                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 259                                L_PTE_MT_BUFFERABLE,
 260                .prot_l1   = PMD_TYPE_TABLE,
 261                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 262                .domain    = DOMAIN_KERNEL,
 263        },
 264        [MT_MEMORY_DTCM] = {
 265                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 266                                L_PTE_XN,
 267                .prot_l1   = PMD_TYPE_TABLE,
 268                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 269                .domain    = DOMAIN_KERNEL,
 270        },
 271        [MT_MEMORY_ITCM] = {
 272                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 273                .prot_l1   = PMD_TYPE_TABLE,
 274                .domain    = DOMAIN_KERNEL,
 275        },
 276};
 277
 278const struct mem_type *get_mem_type(unsigned int type)
 279{
 280        return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
 281}
 282EXPORT_SYMBOL(get_mem_type);
 283
 284/*
 285 * Adjust the PMD section entries according to the CPU in use.
 286 */
 287static void __init build_mem_type_table(void)
 288{
 289        struct cachepolicy *cp;
 290        unsigned int cr = get_cr();
 291        unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
 292        int cpu_arch = cpu_architecture();
 293        int i;
 294
 295        if (cpu_arch < CPU_ARCH_ARMv6) {
 296#if defined(CONFIG_CPU_DCACHE_DISABLE)
 297                if (cachepolicy > CPOLICY_BUFFERED)
 298                        cachepolicy = CPOLICY_BUFFERED;
 299#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
 300                if (cachepolicy > CPOLICY_WRITETHROUGH)
 301                        cachepolicy = CPOLICY_WRITETHROUGH;
 302#endif
 303        }
 304        if (cpu_arch < CPU_ARCH_ARMv5) {
 305                if (cachepolicy >= CPOLICY_WRITEALLOC)
 306                        cachepolicy = CPOLICY_WRITEBACK;
 307                ecc_mask = 0;
 308        }
 309        if (is_smp())
 310                cachepolicy = CPOLICY_WRITEALLOC;
 311
 312        /*
 313         * Strip out features not present on earlier architectures.
 314         * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
 315         * without extended page tables don't have the 'Shared' bit.
 316         */
 317        if (cpu_arch < CPU_ARCH_ARMv5)
 318                for (i = 0; i < ARRAY_SIZE(mem_types); i++)
 319                        mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
 320        if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
 321                for (i = 0; i < ARRAY_SIZE(mem_types); i++)
 322                        mem_types[i].prot_sect &= ~PMD_SECT_S;
 323
 324        /*
 325         * ARMv5 and lower, bit 4 must be set for page tables (was: cache
 326         * "update-able on write" bit on ARM610).  However, Xscale and
 327         * Xscale3 require this bit to be cleared.
 328         */
 329        if (cpu_is_xscale() || cpu_is_xsc3()) {
 330                for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 331                        mem_types[i].prot_sect &= ~PMD_BIT4;
 332                        mem_types[i].prot_l1 &= ~PMD_BIT4;
 333                }
 334        } else if (cpu_arch < CPU_ARCH_ARMv6) {
 335                for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 336                        if (mem_types[i].prot_l1)
 337                                mem_types[i].prot_l1 |= PMD_BIT4;
 338                        if (mem_types[i].prot_sect)
 339                                mem_types[i].prot_sect |= PMD_BIT4;
 340                }
 341        }
 342
 343        /*
 344         * Mark the device areas according to the CPU/architecture.
 345         */
 346        if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
 347                if (!cpu_is_xsc3()) {
 348                        /*
 349                         * Mark device regions on ARMv6+ as execute-never
 350                         * to prevent speculative instruction fetches.
 351                         */
 352                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
 353                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
 354                        mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
 355                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
 356                }
 357                if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 358                        /*
 359                         * For ARMv7 with TEX remapping,
 360                         * - shared device is SXCB=1100
 361                         * - nonshared device is SXCB=0100
 362                         * - write combine device mem is SXCB=0001
 363                         * (Uncached Normal memory)
 364                         */
 365                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
 366                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
 367                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
 368                } else if (cpu_is_xsc3()) {
 369                        /*
 370                         * For Xscale3,
 371                         * - shared device is TEXCB=00101
 372                         * - nonshared device is TEXCB=01000
 373                         * - write combine device mem is TEXCB=00100
 374                         * (Inner/Outer Uncacheable in xsc3 parlance)
 375                         */
 376                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
 377                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
 378                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
 379                } else {
 380                        /*
 381                         * For ARMv6 and ARMv7 without TEX remapping,
 382                         * - shared device is TEXCB=00001
 383                         * - nonshared device is TEXCB=01000
 384                         * - write combine device mem is TEXCB=00100
 385                         * (Uncached Normal in ARMv6 parlance).
 386                         */
 387                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
 388                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
 389                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
 390                }
 391        } else {
 392                /*
 393                 * On others, write combining is "Uncached/Buffered"
 394                 */
 395                mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
 396        }
 397
 398        /*
 399         * Now deal with the memory-type mappings
 400         */
 401        cp = &cache_policies[cachepolicy];
 402        vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
 403
 404        /*
 405         * Only use write-through for non-SMP systems
 406         */
 407        if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
 408                vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
 409
 410        /*
 411         * Enable CPU-specific coherency if supported.
 412         * (Only available on XSC3 at the moment.)
 413         */
 414        if (arch_is_coherent() && cpu_is_xsc3()) {
 415                mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 416                mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
 417                mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 418                mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
 419        }
 420        /*
 421         * ARMv6 and above have extended page tables.
 422         */
 423        if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
 424                /*
 425                 * Mark cache clean areas and XIP ROM read only
 426                 * from SVC mode and no access from userspace.
 427                 */
 428                mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 429                mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 430                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 431
 432                if (is_smp()) {
 433                        /*
 434                         * Mark memory with the "shared" attribute
 435                         * for SMP systems
 436                         */
 437                        user_pgprot |= L_PTE_SHARED;
 438                        kern_pgprot |= L_PTE_SHARED;
 439                        vecs_pgprot |= L_PTE_SHARED;
 440                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
 441                        mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
 442                        mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
 443                        mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
 444                        mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 445                        mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
 446                        mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 447                        mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
 448                }
 449        }
 450
 451        /*
 452         * Non-cacheable Normal - intended for memory areas that must
 453         * not cause dirty cache line writebacks when used
 454         */
 455        if (cpu_arch >= CPU_ARCH_ARMv6) {
 456                if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 457                        /* Non-cacheable Normal is XCB = 001 */
 458                        mem_types[MT_MEMORY_NONCACHED].prot_sect |=
 459                                PMD_SECT_BUFFERED;
 460                } else {
 461                        /* For both ARMv6 and non-TEX-remapping ARMv7 */
 462                        mem_types[MT_MEMORY_NONCACHED].prot_sect |=
 463                                PMD_SECT_TEX(1);
 464                }
 465        } else {
 466                mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
 467        }
 468
 469        for (i = 0; i < 16; i++) {
 470                unsigned long v = pgprot_val(protection_map[i]);
 471                protection_map[i] = __pgprot(v | user_pgprot);
 472        }
 473
 474        mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
 475        mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
 476
 477        pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 478        pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 479                                 L_PTE_DIRTY | kern_pgprot);
 480
 481        mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 482        mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
 483        mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
 484        mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
 485        mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
 486        mem_types[MT_ROM].prot_sect |= cp->pmd;
 487
 488        switch (cp->pmd) {
 489        case PMD_SECT_WT:
 490                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
 491                break;
 492        case PMD_SECT_WB:
 493        case PMD_SECT_WBWA:
 494                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
 495                break;
 496        }
 497        printk("Memory policy: ECC %sabled, Data cache %s\n",
 498                ecc_mask ? "en" : "dis", cp->policy);
 499
 500        for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 501                struct mem_type *t = &mem_types[i];
 502                if (t->prot_l1)
 503                        t->prot_l1 |= PMD_DOMAIN(t->domain);
 504                if (t->prot_sect)
 505                        t->prot_sect |= PMD_DOMAIN(t->domain);
 506        }
 507}
 508
 509#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 510pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 511                              unsigned long size, pgprot_t vma_prot)
 512{
 513        if (!pfn_valid(pfn))
 514                return pgprot_noncached(vma_prot);
 515        else if (file->f_flags & O_SYNC)
 516                return pgprot_writecombine(vma_prot);
 517        return vma_prot;
 518}
 519EXPORT_SYMBOL(phys_mem_access_prot);
 520#endif
 521
 522#define vectors_base()  (vectors_high() ? 0xffff0000 : 0)
 523
 524static void __init *early_alloc(unsigned long sz)
 525{
 526        void *ptr = __va(memblock_alloc(sz, sz));
 527        memset(ptr, 0, sz);
 528        return ptr;
 529}
 530
 531static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
 532{
 533        if (pmd_none(*pmd)) {
 534                pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
 535                __pmd_populate(pmd, __pa(pte), prot);
 536        }
 537        BUG_ON(pmd_bad(*pmd));
 538        return pte_offset_kernel(pmd, addr);
 539}
 540
 541static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 542                                  unsigned long end, unsigned long pfn,
 543                                  const struct mem_type *type)
 544{
 545        pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
 546        do {
 547                set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
 548                pfn++;
 549        } while (pte++, addr += PAGE_SIZE, addr != end);
 550}
 551
 552static void __init alloc_init_section(pud_t *pud, unsigned long addr,
 553                                      unsigned long end, phys_addr_t phys,
 554                                      const struct mem_type *type)
 555{
 556        pmd_t *pmd = pmd_offset(pud, addr);
 557
 558        /*
 559         * Try a section mapping - end, addr and phys must all be aligned
 560         * to a section boundary.  Note that PMDs refer to the individual
 561         * L1 entries, whereas PGDs refer to a group of L1 entries making
 562         * up one logical pointer to an L2 table.
 563         */
 564        if (((addr | end | phys) & ~SECTION_MASK) == 0) {
 565                pmd_t *p = pmd;
 566
 567                if (addr & SECTION_SIZE)
 568                        pmd++;
 569
 570                do {
 571                        *pmd = __pmd(phys | type->prot_sect);
 572                        phys += SECTION_SIZE;
 573                } while (pmd++, addr += SECTION_SIZE, addr != end);
 574
 575                flush_pmd_entry(p);
 576        } else {
 577                /*
 578                 * No need to loop; pte's aren't interested in the
 579                 * individual L1 entries.
 580                 */
 581                alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
 582        }
 583}
 584
 585static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 586        unsigned long phys, const struct mem_type *type)
 587{
 588        pud_t *pud = pud_offset(pgd, addr);
 589        unsigned long next;
 590
 591        do {
 592                next = pud_addr_end(addr, end);
 593                alloc_init_section(pud, addr, next, phys, type);
 594                phys += next - addr;
 595        } while (pud++, addr = next, addr != end);
 596}
 597
 598static void __init create_36bit_mapping(struct map_desc *md,
 599                                        const struct mem_type *type)
 600{
 601        unsigned long addr, length, end;
 602        phys_addr_t phys;
 603        pgd_t *pgd;
 604
 605        addr = md->virtual;
 606        phys = __pfn_to_phys(md->pfn);
 607        length = PAGE_ALIGN(md->length);
 608
 609        if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
 610                printk(KERN_ERR "MM: CPU does not support supersection "
 611                       "mapping for 0x%08llx at 0x%08lx\n",
 612                       (long long)__pfn_to_phys((u64)md->pfn), addr);
 613                return;
 614        }
 615
 616        /* N.B. ARMv6 supersections are only defined to work with domain 0.
 617         *      Since domain assignments can in fact be arbitrary, the
 618         *      'domain == 0' check below is required to insure that ARMv6
 619         *      supersections are only allocated for domain 0 regardless
 620         *      of the actual domain assignments in use.
 621         */
 622        if (type->domain) {
 623                printk(KERN_ERR "MM: invalid domain in supersection "
 624                       "mapping for 0x%08llx at 0x%08lx\n",
 625                       (long long)__pfn_to_phys((u64)md->pfn), addr);
 626                return;
 627        }
 628
 629        if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
 630                printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
 631                       " at 0x%08lx invalid alignment\n",
 632                       (long long)__pfn_to_phys((u64)md->pfn), addr);
 633                return;
 634        }
 635
 636        /*
 637         * Shift bits [35:32] of address into bits [23:20] of PMD
 638         * (See ARMv6 spec).
 639         */
 640        phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
 641
 642        pgd = pgd_offset_k(addr);
 643        end = addr + length;
 644        do {
 645                pud_t *pud = pud_offset(pgd, addr);
 646                pmd_t *pmd = pmd_offset(pud, addr);
 647                int i;
 648
 649                for (i = 0; i < 16; i++)
 650                        *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
 651
 652                addr += SUPERSECTION_SIZE;
 653                phys += SUPERSECTION_SIZE;
 654                pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
 655        } while (addr != end);
 656}
 657
 658/*
 659 * Create the page directory entries and any necessary
 660 * page tables for the mapping specified by `md'.  We
 661 * are able to cope here with varying sizes and address
 662 * offsets, and we take full advantage of sections and
 663 * supersections.
 664 */
 665static void __init create_mapping(struct map_desc *md)
 666{
 667        unsigned long addr, length, end;
 668        phys_addr_t phys;
 669        const struct mem_type *type;
 670        pgd_t *pgd;
 671
 672        if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
 673                printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
 674                       " at 0x%08lx in user region\n",
 675                       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 676                return;
 677        }
 678
 679        if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
 680            md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
 681                printk(KERN_WARNING "BUG: mapping for 0x%08llx"
 682                       " at 0x%08lx overlaps vmalloc space\n",
 683                       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 684        }
 685
 686        type = &mem_types[md->type];
 687
 688        /*
 689         * Catch 36-bit addresses
 690         */
 691        if (md->pfn >= 0x100000) {
 692                create_36bit_mapping(md, type);
 693                return;
 694        }
 695
 696        addr = md->virtual & PAGE_MASK;
 697        phys = __pfn_to_phys(md->pfn);
 698        length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 699
 700        if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
 701                printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
 702                       "be mapped using pages, ignoring.\n",
 703                       (long long)__pfn_to_phys(md->pfn), addr);
 704                return;
 705        }
 706
 707        pgd = pgd_offset_k(addr);
 708        end = addr + length;
 709        do {
 710                unsigned long next = pgd_addr_end(addr, end);
 711
 712                alloc_init_pud(pgd, addr, next, phys, type);
 713
 714                phys += next - addr;
 715                addr = next;
 716        } while (pgd++, addr != end);
 717}
 718
 719/*
 720 * Create the architecture specific mappings
 721 */
 722void __init iotable_init(struct map_desc *io_desc, int nr)
 723{
 724        int i;
 725
 726        for (i = 0; i < nr; i++)
 727                create_mapping(io_desc + i);
 728}
 729
 730static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
 731
 732/*
 733 * vmalloc=size forces the vmalloc area to be exactly 'size'
 734 * bytes. This can be used to increase (or decrease) the vmalloc
 735 * area - the default is 128m.
 736 */
 737static int __init early_vmalloc(char *arg)
 738{
 739        unsigned long vmalloc_reserve = memparse(arg, NULL);
 740
 741        if (vmalloc_reserve < SZ_16M) {
 742                vmalloc_reserve = SZ_16M;
 743                printk(KERN_WARNING
 744                        "vmalloc area too small, limiting to %luMB\n",
 745                        vmalloc_reserve >> 20);
 746        }
 747
 748        if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
 749                vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
 750                printk(KERN_WARNING
 751                        "vmalloc area is too big, limiting to %luMB\n",
 752                        vmalloc_reserve >> 20);
 753        }
 754
 755        vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
 756        return 0;
 757}
 758early_param("vmalloc", early_vmalloc);
 759
 760static phys_addr_t lowmem_limit __initdata = 0;
 761
 762void __init sanity_check_meminfo(void)
 763{
 764        int i, j, highmem = 0;
 765
 766        for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
 767                struct membank *bank = &meminfo.bank[j];
 768                *bank = meminfo.bank[i];
 769
 770#ifdef CONFIG_HIGHMEM
 771                if (__va(bank->start) >= vmalloc_min ||
 772                    __va(bank->start) < (void *)PAGE_OFFSET)
 773                        highmem = 1;
 774
 775                bank->highmem = highmem;
 776
 777                /*
 778                 * Split those memory banks which are partially overlapping
 779                 * the vmalloc area greatly simplifying things later.
 780                 */
 781                if (__va(bank->start) < vmalloc_min &&
 782                    bank->size > vmalloc_min - __va(bank->start)) {
 783                        if (meminfo.nr_banks >= NR_BANKS) {
 784                                printk(KERN_CRIT "NR_BANKS too low, "
 785                                                 "ignoring high memory\n");
 786                        } else {
 787                                memmove(bank + 1, bank,
 788                                        (meminfo.nr_banks - i) * sizeof(*bank));
 789                                meminfo.nr_banks++;
 790                                i++;
 791                                bank[1].size -= vmalloc_min - __va(bank->start);
 792                                bank[1].start = __pa(vmalloc_min - 1) + 1;
 793                                bank[1].highmem = highmem = 1;
 794                                j++;
 795                        }
 796                        bank->size = vmalloc_min - __va(bank->start);
 797                }
 798#else
 799                bank->highmem = highmem;
 800
 801                /*
 802                 * Check whether this memory bank would entirely overlap
 803                 * the vmalloc area.
 804                 */
 805                if (__va(bank->start) >= vmalloc_min ||
 806                    __va(bank->start) < (void *)PAGE_OFFSET) {
 807                        printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
 808                               "(vmalloc region overlap).\n",
 809                               (unsigned long long)bank->start,
 810                               (unsigned long long)bank->start + bank->size - 1);
 811                        continue;
 812                }
 813
 814                /*
 815                 * Check whether this memory bank would partially overlap
 816                 * the vmalloc area.
 817                 */
 818                if (__va(bank->start + bank->size) > vmalloc_min ||
 819                    __va(bank->start + bank->size) < __va(bank->start)) {
 820                        unsigned long newsize = vmalloc_min - __va(bank->start);
 821                        printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
 822                               "to -%.8llx (vmalloc region overlap).\n",
 823                               (unsigned long long)bank->start,
 824                               (unsigned long long)bank->start + bank->size - 1,
 825                               (unsigned long long)bank->start + newsize - 1);
 826                        bank->size = newsize;
 827                }
 828#endif
 829                if (!bank->highmem && bank->start + bank->size > lowmem_limit)
 830                        lowmem_limit = bank->start + bank->size;
 831
 832                j++;
 833        }
 834#ifdef CONFIG_HIGHMEM
 835        if (highmem) {
 836                const char *reason = NULL;
 837
 838                if (cache_is_vipt_aliasing()) {
 839                        /*
 840                         * Interactions between kmap and other mappings
 841                         * make highmem support with aliasing VIPT caches
 842                         * rather difficult.
 843                         */
 844                        reason = "with VIPT aliasing cache";
 845                }
 846                if (reason) {
 847                        printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
 848                                reason);
 849                        while (j > 0 && meminfo.bank[j - 1].highmem)
 850                                j--;
 851                }
 852        }
 853#endif
 854        meminfo.nr_banks = j;
 855        memblock_set_current_limit(lowmem_limit);
 856}
 857
 858static inline void prepare_page_table(void)
 859{
 860        unsigned long addr;
 861        phys_addr_t end;
 862
 863        /*
 864         * Clear out all the mappings below the kernel image.
 865         */
 866        for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
 867                pmd_clear(pmd_off_k(addr));
 868
 869#ifdef CONFIG_XIP_KERNEL
 870        /* The XIP kernel is mapped in the module area -- skip over it */
 871        addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
 872#endif
 873        for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
 874                pmd_clear(pmd_off_k(addr));
 875
 876        /*
 877         * Find the end of the first block of lowmem.
 878         */
 879        end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
 880        if (end >= lowmem_limit)
 881                end = lowmem_limit;
 882
 883        /*
 884         * Clear out all the kernel space mappings, except for the first
 885         * memory bank, up to the end of the vmalloc region.
 886         */
 887        for (addr = __phys_to_virt(end);
 888             addr < VMALLOC_END; addr += PGDIR_SIZE)
 889                pmd_clear(pmd_off_k(addr));
 890}
 891
 892/*
 893 * Reserve the special regions of memory
 894 */
 895void __init arm_mm_memblock_reserve(void)
 896{
 897        /*
 898         * Reserve the page tables.  These are already in use,
 899         * and can only be in node 0.
 900         */
 901        memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t));
 902
 903#ifdef CONFIG_SA1111
 904        /*
 905         * Because of the SA1111 DMA bug, we want to preserve our
 906         * precious DMA-able memory...
 907         */
 908        memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
 909#endif
 910}
 911
 912/*
 913 * Set up device the mappings.  Since we clear out the page tables for all
 914 * mappings above VMALLOC_END, we will remove any debug device mappings.
 915 * This means you have to be careful how you debug this function, or any
 916 * called function.  This means you can't use any function or debugging
 917 * method which may touch any device, otherwise the kernel _will_ crash.
 918 */
 919static void __init devicemaps_init(struct machine_desc *mdesc)
 920{
 921        struct map_desc map;
 922        unsigned long addr;
 923
 924        /*
 925         * Allocate the vector page early.
 926         */
 927        vectors_page = early_alloc(PAGE_SIZE);
 928
 929        for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
 930                pmd_clear(pmd_off_k(addr));
 931
 932        /*
 933         * Map the kernel if it is XIP.
 934         * It is always first in the modulearea.
 935         */
 936#ifdef CONFIG_XIP_KERNEL
 937        map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
 938        map.virtual = MODULES_VADDR;
 939        map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
 940        map.type = MT_ROM;
 941        create_mapping(&map);
 942#endif
 943
 944        /*
 945         * Map the cache flushing regions.
 946         */
 947#ifdef FLUSH_BASE
 948        map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
 949        map.virtual = FLUSH_BASE;
 950        map.length = SZ_1M;
 951        map.type = MT_CACHECLEAN;
 952        create_mapping(&map);
 953#endif
 954#ifdef FLUSH_BASE_MINICACHE
 955        map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
 956        map.virtual = FLUSH_BASE_MINICACHE;
 957        map.length = SZ_1M;
 958        map.type = MT_MINICLEAN;
 959        create_mapping(&map);
 960#endif
 961
 962        /*
 963         * Create a mapping for the machine vectors at the high-vectors
 964         * location (0xffff0000).  If we aren't using high-vectors, also
 965         * create a mapping at the low-vectors virtual address.
 966         */
 967        map.pfn = __phys_to_pfn(virt_to_phys(vectors_page));
 968        map.virtual = 0xffff0000;
 969        map.length = PAGE_SIZE;
 970        map.type = MT_HIGH_VECTORS;
 971        create_mapping(&map);
 972
 973        if (!vectors_high()) {
 974                map.virtual = 0;
 975                map.type = MT_LOW_VECTORS;
 976                create_mapping(&map);
 977        }
 978
 979        /*
 980         * Ask the machine support to map in the statically mapped devices.
 981         */
 982        if (mdesc->map_io)
 983                mdesc->map_io();
 984
 985        /*
 986         * Finally flush the caches and tlb to ensure that we're in a
 987         * consistent state wrt the writebuffer.  This also ensures that
 988         * any write-allocated cache lines in the vector page are written
 989         * back.  After this point, we can start to touch devices again.
 990         */
 991        local_flush_tlb_all();
 992        flush_cache_all();
 993}
 994
 995static void __init kmap_init(void)
 996{
 997#ifdef CONFIG_HIGHMEM
 998        pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
 999                PKMAP_BASE, _PAGE_KERNEL_TABLE);
1000#endif
1001}
1002
1003static void __init map_lowmem(void)
1004{
1005        struct memblock_region *reg;
1006
1007        /* Map all the lowmem memory banks. */
1008        for_each_memblock(memory, reg) {
1009                phys_addr_t start = reg->base;
1010                phys_addr_t end = start + reg->size;
1011                struct map_desc map;
1012
1013                if (end > lowmem_limit)
1014                        end = lowmem_limit;
1015                if (start >= end)
1016                        break;
1017
1018                map.pfn = __phys_to_pfn(start);
1019                map.virtual = __phys_to_virt(start);
1020                map.length = end - start;
1021                map.type = MT_MEMORY;
1022
1023                create_mapping(&map);
1024        }
1025}
1026
1027/*
1028 * paging_init() sets up the page tables, initialises the zone memory
1029 * maps, and sets up the zero page, bad page and bad page tables.
1030 */
1031void __init paging_init(struct machine_desc *mdesc)
1032{
1033        void *zero_page;
1034
1035        memblock_set_current_limit(lowmem_limit);
1036
1037        build_mem_type_table();
1038        prepare_page_table();
1039        map_lowmem();
1040        devicemaps_init(mdesc);
1041        kmap_init();
1042
1043        top_pmd = pmd_off_k(0xffff0000);
1044
1045        /* allocate the zero page. */
1046        zero_page = early_alloc(PAGE_SIZE);
1047
1048        bootmem_init();
1049
1050        empty_zero_page = virt_to_page(zero_page);
1051        __flush_dcache_page(NULL, empty_zero_page);
1052}
1053