linux/arch/powerpc/mm/hash_native_64.c
<<
>>
Prefs
   1/*
   2 * native hashtable management.
   3 *
   4 * SMP scalability work:
   5 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
   6 * 
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; either version
  10 * 2 of the License, or (at your option) any later version.
  11 */
  12
  13#undef DEBUG_LOW
  14
  15#include <linux/spinlock.h>
  16#include <linux/bitops.h>
  17#include <linux/threads.h>
  18#include <linux/smp.h>
  19
  20#include <asm/abs_addr.h>
  21#include <asm/machdep.h>
  22#include <asm/mmu.h>
  23#include <asm/mmu_context.h>
  24#include <asm/pgtable.h>
  25#include <asm/tlbflush.h>
  26#include <asm/tlb.h>
  27#include <asm/cputable.h>
  28#include <asm/udbg.h>
  29#include <asm/kexec.h>
  30
  31#ifdef DEBUG_LOW
  32#define DBG_LOW(fmt...) udbg_printf(fmt)
  33#else
  34#define DBG_LOW(fmt...)
  35#endif
  36
  37#define HPTE_LOCK_BIT 3
  38
  39static DEFINE_SPINLOCK(native_tlbie_lock);
  40
  41static inline void __tlbie(unsigned long va, int psize, int ssize)
  42{
  43        unsigned int penc;
  44
  45        /* clear top 16 bits, non SLS segment */
  46        va &= ~(0xffffULL << 48);
  47
  48        switch (psize) {
  49        case MMU_PAGE_4K:
  50                va &= ~0xffful;
  51                va |= ssize << 8;
  52                asm volatile("tlbie %0,0" : : "r" (va) : "memory");
  53                break;
  54        default:
  55                penc = mmu_psize_defs[psize].penc;
  56                va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
  57                va |= penc << 12;
  58                va |= ssize << 8;
  59                asm volatile("tlbie %0,1" : : "r" (va) : "memory");
  60                break;
  61        }
  62}
  63
  64static inline void __tlbiel(unsigned long va, int psize, int ssize)
  65{
  66        unsigned int penc;
  67
  68        /* clear top 16 bits, non SLS segment */
  69        va &= ~(0xffffULL << 48);
  70
  71        switch (psize) {
  72        case MMU_PAGE_4K:
  73                va &= ~0xffful;
  74                va |= ssize << 8;
  75                asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
  76                             : : "r"(va) : "memory");
  77                break;
  78        default:
  79                penc = mmu_psize_defs[psize].penc;
  80                va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
  81                va |= penc << 12;
  82                va |= ssize << 8;
  83                asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
  84                             : : "r"(va) : "memory");
  85                break;
  86        }
  87
  88}
  89
  90static inline void tlbie(unsigned long va, int psize, int ssize, int local)
  91{
  92        unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
  93        int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
  94
  95        if (use_local)
  96                use_local = mmu_psize_defs[psize].tlbiel;
  97        if (lock_tlbie && !use_local)
  98                spin_lock(&native_tlbie_lock);
  99        asm volatile("ptesync": : :"memory");
 100        if (use_local) {
 101                __tlbiel(va, psize, ssize);
 102                asm volatile("ptesync": : :"memory");
 103        } else {
 104                __tlbie(va, psize, ssize);
 105                asm volatile("eieio; tlbsync; ptesync": : :"memory");
 106        }
 107        if (lock_tlbie && !use_local)
 108                spin_unlock(&native_tlbie_lock);
 109}
 110
 111static inline void native_lock_hpte(struct hash_pte *hptep)
 112{
 113        unsigned long *word = &hptep->v;
 114
 115        while (1) {
 116                if (!test_and_set_bit(HPTE_LOCK_BIT, word))
 117                        break;
 118                while(test_bit(HPTE_LOCK_BIT, word))
 119                        cpu_relax();
 120        }
 121}
 122
 123static inline void native_unlock_hpte(struct hash_pte *hptep)
 124{
 125        unsigned long *word = &hptep->v;
 126
 127        asm volatile("lwsync":::"memory");
 128        clear_bit(HPTE_LOCK_BIT, word);
 129}
 130
 131static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 132                        unsigned long pa, unsigned long rflags,
 133                        unsigned long vflags, int psize, int ssize)
 134{
 135        struct hash_pte *hptep = htab_address + hpte_group;
 136        unsigned long hpte_v, hpte_r;
 137        int i;
 138
 139        if (!(vflags & HPTE_V_BOLTED)) {
 140                DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
 141                        " rflags=%lx, vflags=%lx, psize=%d)\n",
 142                        hpte_group, va, pa, rflags, vflags, psize);
 143        }
 144
 145        for (i = 0; i < HPTES_PER_GROUP; i++) {
 146                if (! (hptep->v & HPTE_V_VALID)) {
 147                        /* retry with lock held */
 148                        native_lock_hpte(hptep);
 149                        if (! (hptep->v & HPTE_V_VALID))
 150                                break;
 151                        native_unlock_hpte(hptep);
 152                }
 153
 154                hptep++;
 155        }
 156
 157        if (i == HPTES_PER_GROUP)
 158                return -1;
 159
 160        hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
 161        hpte_r = hpte_encode_r(pa, psize) | rflags;
 162
 163        if (!(vflags & HPTE_V_BOLTED)) {
 164                DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
 165                        i, hpte_v, hpte_r);
 166        }
 167
 168        hptep->r = hpte_r;
 169        /* Guarantee the second dword is visible before the valid bit */
 170        eieio();
 171        /*
 172         * Now set the first dword including the valid bit
 173         * NOTE: this also unlocks the hpte
 174         */
 175        hptep->v = hpte_v;
 176
 177        __asm__ __volatile__ ("ptesync" : : : "memory");
 178
 179        return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
 180}
 181
 182static long native_hpte_remove(unsigned long hpte_group)
 183{
 184        struct hash_pte *hptep;
 185        int i;
 186        int slot_offset;
 187        unsigned long hpte_v;
 188
 189        DBG_LOW("    remove(group=%lx)\n", hpte_group);
 190
 191        /* pick a random entry to start at */
 192        slot_offset = mftb() & 0x7;
 193
 194        for (i = 0; i < HPTES_PER_GROUP; i++) {
 195                hptep = htab_address + hpte_group + slot_offset;
 196                hpte_v = hptep->v;
 197
 198                if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 199                        /* retry with lock held */
 200                        native_lock_hpte(hptep);
 201                        hpte_v = hptep->v;
 202                        if ((hpte_v & HPTE_V_VALID)
 203                            && !(hpte_v & HPTE_V_BOLTED))
 204                                break;
 205                        native_unlock_hpte(hptep);
 206                }
 207
 208                slot_offset++;
 209                slot_offset &= 0x7;
 210        }
 211
 212        if (i == HPTES_PER_GROUP)
 213                return -1;
 214
 215        /* Invalidate the hpte. NOTE: this also unlocks it */
 216        hptep->v = 0;
 217
 218        return i;
 219}
 220
 221static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 222                                 unsigned long va, int psize, int ssize,
 223                                 int local)
 224{
 225        struct hash_pte *hptep = htab_address + slot;
 226        unsigned long hpte_v, want_v;
 227        int ret = 0;
 228
 229        want_v = hpte_encode_v(va, psize, ssize);
 230
 231        DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
 232                va, want_v & HPTE_V_AVPN, slot, newpp);
 233
 234        native_lock_hpte(hptep);
 235
 236        hpte_v = hptep->v;
 237
 238        /* Even if we miss, we need to invalidate the TLB */
 239        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
 240                DBG_LOW(" -> miss\n");
 241                ret = -1;
 242        } else {
 243                DBG_LOW(" -> hit\n");
 244                /* Update the HPTE */
 245                hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 246                        (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
 247        }
 248        native_unlock_hpte(hptep);
 249
 250        /* Ensure it is out of the tlb too. */
 251        tlbie(va, psize, ssize, local);
 252
 253        return ret;
 254}
 255
 256static long native_hpte_find(unsigned long va, int psize, int ssize)
 257{
 258        struct hash_pte *hptep;
 259        unsigned long hash;
 260        unsigned long i;
 261        long slot;
 262        unsigned long want_v, hpte_v;
 263
 264        hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
 265        want_v = hpte_encode_v(va, psize, ssize);
 266
 267        /* Bolted mappings are only ever in the primary group */
 268        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 269        for (i = 0; i < HPTES_PER_GROUP; i++) {
 270                hptep = htab_address + slot;
 271                hpte_v = hptep->v;
 272
 273                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 274                        /* HPTE matches */
 275                        return slot;
 276                ++slot;
 277        }
 278
 279        return -1;
 280}
 281
 282/*
 283 * Update the page protection bits. Intended to be used to create
 284 * guard pages for kernel data structures on pages which are bolted
 285 * in the HPT. Assumes pages being operated on will not be stolen.
 286 *
 287 * No need to lock here because we should be the only user.
 288 */
 289static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 290                                       int psize, int ssize)
 291{
 292        unsigned long vsid, va;
 293        long slot;
 294        struct hash_pte *hptep;
 295
 296        vsid = get_kernel_vsid(ea, ssize);
 297        va = hpt_va(ea, vsid, ssize);
 298
 299        slot = native_hpte_find(va, psize, ssize);
 300        if (slot == -1)
 301                panic("could not find page to bolt\n");
 302        hptep = htab_address + slot;
 303
 304        /* Update the HPTE */
 305        hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 306                (newpp & (HPTE_R_PP | HPTE_R_N));
 307
 308        /* Ensure it is out of the tlb too. */
 309        tlbie(va, psize, ssize, 0);
 310}
 311
 312static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 313                                   int psize, int ssize, int local)
 314{
 315        struct hash_pte *hptep = htab_address + slot;
 316        unsigned long hpte_v;
 317        unsigned long want_v;
 318        unsigned long flags;
 319
 320        local_irq_save(flags);
 321
 322        DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
 323
 324        want_v = hpte_encode_v(va, psize, ssize);
 325        native_lock_hpte(hptep);
 326        hpte_v = hptep->v;
 327
 328        /* Even if we miss, we need to invalidate the TLB */
 329        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
 330                native_unlock_hpte(hptep);
 331        else
 332                /* Invalidate the hpte. NOTE: this also unlocks it */
 333                hptep->v = 0;
 334
 335        /* Invalidate the TLB */
 336        tlbie(va, psize, ssize, local);
 337
 338        local_irq_restore(flags);
 339}
 340
 341#define LP_SHIFT        12
 342#define LP_BITS         8
 343#define LP_MASK(i)      ((0xFF >> (i)) << LP_SHIFT)
 344
 345static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 346                        int *psize, int *ssize, unsigned long *va)
 347{
 348        unsigned long hpte_r = hpte->r;
 349        unsigned long hpte_v = hpte->v;
 350        unsigned long avpn;
 351        int i, size, shift, penc;
 352
 353        if (!(hpte_v & HPTE_V_LARGE))
 354                size = MMU_PAGE_4K;
 355        else {
 356                for (i = 0; i < LP_BITS; i++) {
 357                        if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
 358                                break;
 359                }
 360                penc = LP_MASK(i+1) >> LP_SHIFT;
 361                for (size = 0; size < MMU_PAGE_COUNT; size++) {
 362
 363                        /* 4K pages are not represented by LP */
 364                        if (size == MMU_PAGE_4K)
 365                                continue;
 366
 367                        /* valid entries have a shift value */
 368                        if (!mmu_psize_defs[size].shift)
 369                                continue;
 370
 371                        if (penc == mmu_psize_defs[size].penc)
 372                                break;
 373                }
 374        }
 375
 376        /* This works for all page sizes, and for 256M and 1T segments */
 377        shift = mmu_psize_defs[size].shift;
 378        avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
 379
 380        if (shift < 23) {
 381                unsigned long vpi, vsid, pteg;
 382
 383                pteg = slot / HPTES_PER_GROUP;
 384                if (hpte_v & HPTE_V_SECONDARY)
 385                        pteg = ~pteg;
 386                switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
 387                case MMU_SEGSIZE_256M:
 388                        vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
 389                        break;
 390                case MMU_SEGSIZE_1T:
 391                        vsid = avpn >> 40;
 392                        vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 393                        break;
 394                default:
 395                        avpn = vpi = size = 0;
 396                }
 397                avpn |= (vpi << mmu_psize_defs[size].shift);
 398        }
 399
 400        *va = avpn;
 401        *psize = size;
 402        *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 403}
 404
 405/*
 406 * clear all mappings on kexec.  All cpus are in real mode (or they will
 407 * be when they isi), and we are the only one left.  We rely on our kernel
 408 * mapping being 0xC0's and the hardware ignoring those two real bits.
 409 *
 410 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 411 * athough there is the control page available...
 412 */
 413static void native_hpte_clear(void)
 414{
 415        unsigned long slot, slots, flags;
 416        struct hash_pte *hptep = htab_address;
 417        unsigned long hpte_v, va;
 418        unsigned long pteg_count;
 419        int psize, ssize;
 420
 421        pteg_count = htab_hash_mask + 1;
 422
 423        local_irq_save(flags);
 424
 425        /* we take the tlbie lock and hold it.  Some hardware will
 426         * deadlock if we try to tlbie from two processors at once.
 427         */
 428        spin_lock(&native_tlbie_lock);
 429
 430        slots = pteg_count * HPTES_PER_GROUP;
 431
 432        for (slot = 0; slot < slots; slot++, hptep++) {
 433                /*
 434                 * we could lock the pte here, but we are the only cpu
 435                 * running,  right?  and for crash dump, we probably
 436                 * don't want to wait for a maybe bad cpu.
 437                 */
 438                hpte_v = hptep->v;
 439
 440                /*
 441                 * Call __tlbie() here rather than tlbie() since we
 442                 * already hold the native_tlbie_lock.
 443                 */
 444                if (hpte_v & HPTE_V_VALID) {
 445                        hpte_decode(hptep, slot, &psize, &ssize, &va);
 446                        hptep->v = 0;
 447                        __tlbie(va, psize, ssize);
 448                }
 449        }
 450
 451        asm volatile("eieio; tlbsync; ptesync":::"memory");
 452        spin_unlock(&native_tlbie_lock);
 453        local_irq_restore(flags);
 454}
 455
 456/*
 457 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 458 * the lock all the time
 459 */
 460static void native_flush_hash_range(unsigned long number, int local)
 461{
 462        unsigned long va, hash, index, hidx, shift, slot;
 463        struct hash_pte *hptep;
 464        unsigned long hpte_v;
 465        unsigned long want_v;
 466        unsigned long flags;
 467        real_pte_t pte;
 468        struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 469        unsigned long psize = batch->psize;
 470        int ssize = batch->ssize;
 471        int i;
 472
 473        local_irq_save(flags);
 474
 475        for (i = 0; i < number; i++) {
 476                va = batch->vaddr[i];
 477                pte = batch->pte[i];
 478
 479                pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
 480                        hash = hpt_hash(va, shift, ssize);
 481                        hidx = __rpte_to_hidx(pte, index);
 482                        if (hidx & _PTEIDX_SECONDARY)
 483                                hash = ~hash;
 484                        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 485                        slot += hidx & _PTEIDX_GROUP_IX;
 486                        hptep = htab_address + slot;
 487                        want_v = hpte_encode_v(va, psize, ssize);
 488                        native_lock_hpte(hptep);
 489                        hpte_v = hptep->v;
 490                        if (!HPTE_V_COMPARE(hpte_v, want_v) ||
 491                            !(hpte_v & HPTE_V_VALID))
 492                                native_unlock_hpte(hptep);
 493                        else
 494                                hptep->v = 0;
 495                } pte_iterate_hashed_end();
 496        }
 497
 498        if (cpu_has_feature(CPU_FTR_TLBIEL) &&
 499            mmu_psize_defs[psize].tlbiel && local) {
 500                asm volatile("ptesync":::"memory");
 501                for (i = 0; i < number; i++) {
 502                        va = batch->vaddr[i];
 503                        pte = batch->pte[i];
 504
 505                        pte_iterate_hashed_subpages(pte, psize, va, index,
 506                                                    shift) {
 507                                __tlbiel(va, psize, ssize);
 508                        } pte_iterate_hashed_end();
 509                }
 510                asm volatile("ptesync":::"memory");
 511        } else {
 512                int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 513
 514                if (lock_tlbie)
 515                        spin_lock(&native_tlbie_lock);
 516
 517                asm volatile("ptesync":::"memory");
 518                for (i = 0; i < number; i++) {
 519                        va = batch->vaddr[i];
 520                        pte = batch->pte[i];
 521
 522                        pte_iterate_hashed_subpages(pte, psize, va, index,
 523                                                    shift) {
 524                                __tlbie(va, psize, ssize);
 525                        } pte_iterate_hashed_end();
 526                }
 527                asm volatile("eieio; tlbsync; ptesync":::"memory");
 528
 529                if (lock_tlbie)
 530                        spin_unlock(&native_tlbie_lock);
 531        }
 532
 533        local_irq_restore(flags);
 534}
 535
 536#ifdef CONFIG_PPC_PSERIES
 537/* Disable TLB batching on nighthawk */
 538static inline int tlb_batching_enabled(void)
 539{
 540        struct device_node *root = of_find_node_by_path("/");
 541        int enabled = 1;
 542
 543        if (root) {
 544                const char *model = of_get_property(root, "model", NULL);
 545                if (model && !strcmp(model, "IBM,9076-N81"))
 546                        enabled = 0;
 547                of_node_put(root);
 548        }
 549
 550        return enabled;
 551}
 552#else
 553static inline int tlb_batching_enabled(void)
 554{
 555        return 1;
 556}
 557#endif
 558
 559void __init hpte_init_native(void)
 560{
 561        ppc_md.hpte_invalidate  = native_hpte_invalidate;
 562        ppc_md.hpte_updatepp    = native_hpte_updatepp;
 563        ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
 564        ppc_md.hpte_insert      = native_hpte_insert;
 565        ppc_md.hpte_remove      = native_hpte_remove;
 566        ppc_md.hpte_clear_all   = native_hpte_clear;
 567        if (tlb_batching_enabled())
 568                ppc_md.flush_hash_range = native_flush_hash_range;
 569}
 570
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.