linux/arch/i386/mm/hugetlbpage.c
<<
>>
Prefs
   1/*
   2 * IA-32 Huge TLB Page Support for Kernel.
   3 *
   4 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
   5 */
   6
   7#include <linux/init.h>
   8#include <linux/fs.h>
   9#include <linux/mm.h>
  10#include <linux/hugetlb.h>
  11#include <linux/pagemap.h>
  12#include <linux/slab.h>
  13#include <linux/err.h>
  14#include <linux/sysctl.h>
  15#include <asm/mman.h>
  16#include <asm/tlb.h>
  17#include <asm/tlbflush.h>
  18
  19static unsigned long page_table_shareable(struct vm_area_struct *svma,
  20                                struct vm_area_struct *vma,
  21                                unsigned long addr, pgoff_t idx)
  22{
  23        unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
  24                                svma->vm_start;
  25        unsigned long sbase = saddr & PUD_MASK;
  26        unsigned long s_end = sbase + PUD_SIZE;
  27
  28        /*
  29         * match the virtual addresses, permission and the alignment of the
  30         * page table page.
  31         */
  32        if (pmd_index(addr) != pmd_index(saddr) ||
  33            vma->vm_flags != svma->vm_flags ||
  34            sbase < svma->vm_start || svma->vm_end < s_end)
  35                return 0;
  36
  37        return saddr;
  38}
  39
  40static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
  41{
  42        unsigned long base = addr & PUD_MASK;
  43        unsigned long end = base + PUD_SIZE;
  44
  45        /*
  46         * check on proper vm_flags and page table alignment
  47         */
  48        if (vma->vm_flags & VM_MAYSHARE &&
  49            vma->vm_start <= base && end <= vma->vm_end)
  50                return 1;
  51        return 0;
  52}
  53
  54/*
  55 * search for a shareable pmd page for hugetlb.
  56 */
  57static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
  58{
  59        struct vm_area_struct *vma = find_vma(mm, addr);
  60        struct address_space *mapping = vma->vm_file->f_mapping;
  61        pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
  62                        vma->vm_pgoff;
  63        struct prio_tree_iter iter;
  64        struct vm_area_struct *svma;
  65        unsigned long saddr;
  66        pte_t *spte = NULL;
  67
  68        if (!vma_shareable(vma, addr))
  69                return;
  70
  71        spin_lock(&mapping->i_mmap_lock);
  72        vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
  73                if (svma == vma)
  74                        continue;
  75
  76                saddr = page_table_shareable(svma, vma, addr, idx);
  77                if (saddr) {
  78                        spte = huge_pte_offset(svma->vm_mm, saddr);
  79                        if (spte) {
  80                                get_page(virt_to_page(spte));
  81                                break;
  82                        }
  83                }
  84        }
  85
  86        if (!spte)
  87                goto out;
  88
  89        spin_lock(&mm->page_table_lock);
  90        if (pud_none(*pud))
  91                pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK);
  92        else
  93                put_page(virt_to_page(spte));
  94        spin_unlock(&mm->page_table_lock);
  95out:
  96        spin_unlock(&mapping->i_mmap_lock);
  97}
  98
  99/*
 100 * unmap huge page backed by shared pte.
 101 *
 102 * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
 103 * indicated by page_count > 1, unmap is achieved by clearing pud and
 104 * decrementing the ref count. If count == 1, the pte page is not shared.
 105 *
 106 * called with vma->vm_mm->page_table_lock held.
 107 *
 108 * returns: 1 successfully unmapped a shared pte page
 109 *          0 the underlying pte page is not shared, or it is the last user
 110 */
 111int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 112{
 113        pgd_t *pgd = pgd_offset(mm, *addr);
 114        pud_t *pud = pud_offset(pgd, *addr);
 115
 116        BUG_ON(page_count(virt_to_page(ptep)) == 0);
 117        if (page_count(virt_to_page(ptep)) == 1)
 118                return 0;
 119
 120        pud_clear(pud);
 121        put_page(virt_to_page(ptep));
 122        *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
 123        return 1;
 124}
 125
 126pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 127{
 128        pgd_t *pgd;
 129        pud_t *pud;
 130        pte_t *pte = NULL;
 131
 132        pgd = pgd_offset(mm, addr);
 133        pud = pud_alloc(mm, pgd, addr);
 134        if (pud) {
 135                if (pud_none(*pud))
 136                        huge_pmd_share(mm, addr, pud);
 137                pte = (pte_t *) pmd_alloc(mm, pud, addr);
 138        }
 139        BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
 140
 141        return pte;
 142}
 143
 144pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 145{
 146        pgd_t *pgd;
 147        pud_t *pud;
 148        pmd_t *pmd = NULL;
 149
 150        pgd = pgd_offset(mm, addr);
 151        if (pgd_present(*pgd)) {
 152                pud = pud_offset(pgd, addr);
 153                if (pud_present(*pud))
 154                        pmd = pmd_offset(pud, addr);
 155        }
 156        return (pte_t *) pmd;
 157}
 158
 159#if 0   /* This is just for testing */
 160struct page *
 161follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 162{
 163        unsigned long start = address;
 164        int length = 1;
 165        int nr;
 166        struct page *page;
 167        struct vm_area_struct *vma;
 168
 169        vma = find_vma(mm, addr);
 170        if (!vma || !is_vm_hugetlb_page(vma))
 171                return ERR_PTR(-EINVAL);
 172
 173        pte = huge_pte_offset(mm, address);
 174
 175        /* hugetlb should be locked, and hence, prefaulted */
 176        WARN_ON(!pte || pte_none(*pte));
 177
 178        page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
 179
 180        WARN_ON(!PageCompound(page));
 181
 182        return page;
 183}
 184
 185int pmd_huge(pmd_t pmd)
 186{
 187        return 0;
 188}
 189
 190struct page *
 191follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 192                pmd_t *pmd, int write)
 193{
 194        return NULL;
 195}
 196
 197#else
 198
 199struct page *
 200follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 201{
 202        return ERR_PTR(-EINVAL);
 203}
 204
 205int pmd_huge(pmd_t pmd)
 206{
 207        return !!(pmd_val(pmd) & _PAGE_PSE);
 208}
 209
 210struct page *
 211follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 212                pmd_t *pmd, int write)
 213{
 214        struct page *page;
 215
 216        page = pte_page(*(pte_t *)pmd);
 217        if (page)
 218                page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
 219        return page;
 220}
 221#endif
 222
 223/* x86_64 also uses this file */
 224
 225#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 226static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 227                unsigned long addr, unsigned long len,
 228                unsigned long pgoff, unsigned long flags)
 229{
 230        struct mm_struct *mm = current->mm;
 231        struct vm_area_struct *vma;
 232        unsigned long start_addr;
 233
 234        if (len > mm->cached_hole_size) {
 235                start_addr = mm->free_area_cache;
 236        } else {
 237                start_addr = TASK_UNMAPPED_BASE;
 238                mm->cached_hole_size = 0;
 239        }
 240
 241full_search:
 242        addr = ALIGN(start_addr, HPAGE_SIZE);
 243
 244        for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 245                /* At this point:  (!vma || addr < vma->vm_end). */
 246                if (TASK_SIZE - len < addr) {
 247                        /*
 248                         * Start a new search - just in case we missed
 249                         * some holes.
 250                         */
 251                        if (start_addr != TASK_UNMAPPED_BASE) {
 252                                start_addr = TASK_UNMAPPED_BASE;
 253                                mm->cached_hole_size = 0;
 254                                goto full_search;
 255                        }
 256                        return -ENOMEM;
 257                }
 258                if (!vma || addr + len <= vma->vm_start) {
 259                        mm->free_area_cache = addr + len;
 260                        return addr;
 261                }
 262                if (addr + mm->cached_hole_size < vma->vm_start)
 263                        mm->cached_hole_size = vma->vm_start - addr;
 264                addr = ALIGN(vma->vm_end, HPAGE_SIZE);
 265        }
 266}
 267
 268static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 269                unsigned long addr0, unsigned long len,
 270                unsigned long pgoff, unsigned long flags)
 271{
 272        struct mm_struct *mm = current->mm;
 273        struct vm_area_struct *vma, *prev_vma;
 274        unsigned long base = mm->mmap_base, addr = addr0;
 275        unsigned long largest_hole = mm->cached_hole_size;
 276        int first_time = 1;
 277
 278        /* don't allow allocations above current base */
 279        if (mm->free_area_cache > base)
 280                mm->free_area_cache = base;
 281
 282        if (len <= largest_hole) {
 283                largest_hole = 0;
 284                mm->free_area_cache  = base;
 285        }
 286try_again:
 287        /* make sure it can fit in the remaining address space */
 288        if (mm->free_area_cache < len)
 289                goto fail;
 290
 291        /* either no address requested or cant fit in requested address hole */
 292        addr = (mm->free_area_cache - len) & HPAGE_MASK;
 293        do {
 294                /*
 295                 * Lookup failure means no vma is above this address,
 296                 * i.e. return with success:
 297                 */
 298                if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
 299                        return addr;
 300
 301                /*
 302                 * new region fits between prev_vma->vm_end and
 303                 * vma->vm_start, use it:
 304                 */
 305                if (addr + len <= vma->vm_start &&
 306                            (!prev_vma || (addr >= prev_vma->vm_end))) {
 307                        /* remember the address as a hint for next time */
 308                        mm->cached_hole_size = largest_hole;
 309                        return (mm->free_area_cache = addr);
 310                } else {
 311                        /* pull free_area_cache down to the first hole */
 312                        if (mm->free_area_cache == vma->vm_end) {
 313                                mm->free_area_cache = vma->vm_start;
 314                                mm->cached_hole_size = largest_hole;
 315                        }
 316                }
 317
 318                /* remember the largest hole we saw so far */
 319                if (addr + largest_hole < vma->vm_start)
 320                        largest_hole = vma->vm_start - addr;
 321
 322                /* try just below the current vma->vm_start */
 323                addr = (vma->vm_start - len) & HPAGE_MASK;
 324        } while (len <= vma->vm_start);
 325
 326fail:
 327        /*
 328         * if hint left us with no space for the requested
 329         * mapping then try again:
 330         */
 331        if (first_time) {
 332                mm->free_area_cache = base;
 333                largest_hole = 0;
 334                first_time = 0;
 335                goto try_again;
 336        }
 337        /*
 338         * A failed mmap() very likely causes application failure,
 339         * so fall back to the bottom-up function here. This scenario
 340         * can happen with large stack limits and large mmap()
 341         * allocations.
 342         */
 343        mm->free_area_cache = TASK_UNMAPPED_BASE;
 344        mm->cached_hole_size = ~0UL;
 345        addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
 346                        len, pgoff, flags);
 347
 348        /*
 349         * Restore the topdown base:
 350         */
 351        mm->free_area_cache = base;
 352        mm->cached_hole_size = ~0UL;
 353
 354        return addr;
 355}
 356
 357unsigned long
 358hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 359                unsigned long len, unsigned long pgoff, unsigned long flags)
 360{
 361        struct mm_struct *mm = current->mm;
 362        struct vm_area_struct *vma;
 363
 364        if (len & ~HPAGE_MASK)
 365                return -EINVAL;
 366        if (len > TASK_SIZE)
 367                return -ENOMEM;
 368
 369        if (flags & MAP_FIXED) {
 370                if (prepare_hugepage_range(addr, len, pgoff))
 371                        return -EINVAL;
 372                return addr;
 373        }
 374
 375        if (addr) {
 376                addr = ALIGN(addr, HPAGE_SIZE);
 377                vma = find_vma(mm, addr);
 378                if (TASK_SIZE - len >= addr &&
 379                    (!vma || addr + len <= vma->vm_start))
 380                        return addr;
 381        }
 382        if (mm->get_unmapped_area == arch_get_unmapped_area)
 383                return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 384                                pgoff, flags);
 385        else
 386                return hugetlb_get_unmapped_area_topdown(file, addr, len,
 387                                pgoff, flags);
 388}
 389
 390#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
 391
 392
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.