linux-old/arch/ia64/mm/hugetlbpage.c
<<
>>
Prefs
   1/*
   2 * IA-64 Huge TLB Page Support for Kernel.
   3 *
   4 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
   5 */
   6
   7#include <linux/config.h>
   8#include <linux/init.h>
   9#include <linux/fs.h>
  10#include <linux/mm.h>
  11#include <linux/hugetlb.h>
  12#include <linux/pagemap.h>
  13#include <linux/smp_lock.h>
  14#include <linux/slab.h>
  15#include <linux/sysctl.h>
  16#include <asm/mman.h>
  17#include <asm/pgalloc.h>
  18#include <asm/tlb.h>
  19
  20
  21#define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT)
  22
  23static long    htlbpagemem;
  24int     htlbpage_max;
  25static long    htlbzone_pages;
  26
  27struct vm_operations_struct hugetlb_vm_ops;
  28static LIST_HEAD(htlbpage_freelist);
  29static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
  30
  31static struct page *alloc_hugetlb_page(void)
  32{
  33        int i;
  34        struct page *page;
  35
  36        spin_lock(&htlbpage_lock);
  37        if (list_empty(&htlbpage_freelist)) {
  38                spin_unlock(&htlbpage_lock);
  39                return NULL;
  40        }
  41
  42        page = list_entry(htlbpage_freelist.next, struct page, list);
  43        list_del(&page->list);
  44        htlbpagemem--;
  45        spin_unlock(&htlbpage_lock);
  46        set_page_count(page, 1);
  47        for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
  48                clear_highpage(&page[i]);
  49        return page;
  50}
  51
  52static pte_t *
  53huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
  54{
  55        unsigned long taddr = htlbpage_to_page(addr);
  56        pgd_t *pgd;
  57        pmd_t *pmd;
  58        pte_t *pte = NULL;
  59
  60        pgd = pgd_offset(mm, taddr);
  61        pmd = pmd_alloc(mm, pgd, taddr);
  62        if (pmd)
  63                pte = pte_alloc(mm, pmd, taddr);
  64        return pte;
  65}
  66
  67static pte_t *
  68huge_pte_offset (struct mm_struct *mm, unsigned long addr)
  69{
  70        unsigned long taddr = htlbpage_to_page(addr);
  71        pgd_t *pgd;
  72        pmd_t *pmd;
  73        pte_t *pte = NULL;
  74
  75        pgd = pgd_offset(mm, taddr);
  76        if (pgd_present(*pgd)) {
  77                pmd = pmd_offset(pgd, taddr);
  78                if (pmd_present(*pmd))
  79                        pte = pte_offset(pmd, taddr);
  80        }
  81
  82        return pte;
  83}
  84
  85#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
  86
  87static void
  88set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
  89              struct page *page, pte_t * page_table, int write_access)
  90{
  91        pte_t entry;
  92
  93        mm->rss += (HPAGE_SIZE / PAGE_SIZE);
  94        if (write_access) {
  95                entry =
  96                    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
  97        } else
  98                entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
  99        entry = pte_mkyoung(entry);
 100        mk_pte_huge(entry);
 101        set_pte(page_table, entry);
 102        return;
 103}
 104/*
 105 * This function checks for proper alignment of input addr and len parameters.
 106 */
 107int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 108{
 109        if (len & ~HPAGE_MASK)
 110                return -EINVAL;
 111        if (addr & ~HPAGE_MASK)
 112                return -EINVAL;
 113        if (REGION_NUMBER(addr) != REGION_HPAGE)
 114                return -EINVAL;
 115
 116        return 0;
 117}
 118/* This function checks if the address and address+len falls out of HugeTLB region.  It
 119 * return -EINVAL if any part of address range falls in HugeTLB region.
 120 */
 121int  is_invalid_hugepage_range(unsigned long addr, unsigned long len)
 122{
 123        if (REGION_NUMBER(addr) == REGION_HPAGE)
 124                return -EINVAL;
 125        if (REGION_NUMBER(addr+len) == REGION_HPAGE)
 126                return -EINVAL;
 127        return 0;
 128}
 129
 130/*
 131 * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
 132 * are hugetlb region specific.
 133 */
 134void hugetlb_free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
 135        unsigned long start, unsigned long end)
 136{
 137        unsigned long first = start & HUGETLB_PGDIR_MASK;
 138        unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
 139        unsigned long start_index, end_index;
 140
 141        if (!prev) {
 142                prev = mm->mmap;
 143                if (!prev)
 144                        goto no_mmaps;
 145                if (prev->vm_end > start) {
 146                        if (last > prev->vm_start)
 147                                last = prev->vm_start;
 148                        goto no_mmaps;
 149                }
 150        }
 151        for (;;) {
 152                struct vm_area_struct *next = prev->vm_next;
 153
 154                if (next) {
 155                        if (next->vm_start < start) {
 156                                prev = next;
 157                                continue;
 158                        }
 159                        if (last > next->vm_start)
 160                                last = next->vm_start;
 161                }
 162                if (prev->vm_end > first)
 163                        first = prev->vm_end + HUGETLB_PGDIR_SIZE - 1;
 164                break;
 165        }
 166no_mmaps:
 167        if (last < first)
 168                return;
 169        /*
 170         * If the PGD bits are not consecutive in the virtual address, the
 171         * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
 172         */
 173        start_index = pgd_index(htlbpage_to_page(first));
 174        end_index = pgd_index(htlbpage_to_page(last));
 175        if (end_index > start_index) {
 176                clear_page_tables(mm, start_index, end_index - start_index);
 177                flush_tlb_pgtables(mm, first & HUGETLB_PGDIR_MASK,
 178                                   last & HUGETLB_PGDIR_MASK);
 179        }
 180}
 181
 182int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 183                        struct vm_area_struct *vma)
 184{
 185        pte_t *src_pte, *dst_pte, entry;
 186        struct page *ptepage;
 187        unsigned long addr = vma->vm_start;
 188        unsigned long end = vma->vm_end;
 189
 190        while (addr < end) {
 191                dst_pte = huge_pte_alloc(dst, addr);
 192                if (!dst_pte)
 193                        goto nomem;
 194                src_pte = huge_pte_offset(src, addr);
 195                entry = *src_pte;
 196                ptepage = pte_page(entry);
 197                get_page(ptepage);
 198                set_pte(dst_pte, entry);
 199                dst->rss += (HPAGE_SIZE / PAGE_SIZE);
 200                addr += HPAGE_SIZE;
 201        }
 202        return 0;
 203nomem:
 204        return -ENOMEM;
 205}
 206
 207int
 208follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 209                    struct page **pages, struct vm_area_struct **vmas,
 210                    unsigned long *st, int *length, int i)
 211{
 212        pte_t *ptep, pte;
 213        unsigned long start = *st;
 214        unsigned long pstart;
 215        int len = *length;
 216        struct page *page;
 217
 218        do {
 219                pstart = start;
 220                ptep = huge_pte_offset(mm, start);
 221                pte = *ptep;
 222
 223back1:
 224                page = pte_page(pte);
 225                if (pages) {
 226                        page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
 227                        pages[i] = page;
 228                }
 229                if (vmas)
 230                        vmas[i] = vma;
 231                i++;
 232                len--;
 233                start += PAGE_SIZE;
 234                if (((start & HPAGE_MASK) == pstart) && len &&
 235                                (start < vma->vm_end))
 236                        goto back1;
 237        } while (len && start < vma->vm_end);
 238        *length = len;
 239        *st = start;
 240        return i;
 241}
 242
 243void free_huge_page(struct page *page)
 244{
 245        BUG_ON(page_count(page));
 246        BUG_ON(page->mapping);
 247
 248        INIT_LIST_HEAD(&page->list);
 249
 250        spin_lock(&htlbpage_lock);
 251        list_add(&page->list, &htlbpage_freelist);
 252        htlbpagemem++;
 253        spin_unlock(&htlbpage_lock);
 254}
 255
 256void huge_page_release(struct page *page)
 257{
 258        if (!put_page_testzero(page))
 259                return;
 260
 261        free_huge_page(page);
 262}
 263
 264void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 265{
 266        struct mm_struct *mm = vma->vm_mm;
 267        unsigned long address;
 268        pte_t *pte;
 269        struct page *page;
 270
 271        BUG_ON(start & (HPAGE_SIZE - 1));
 272        BUG_ON(end & (HPAGE_SIZE - 1));
 273
 274        for (address = start; address < end; address += HPAGE_SIZE) {
 275                pte = huge_pte_offset(mm, address);
 276                if (!pte || pte_none(*pte))
 277                        continue;
 278                page = pte_page(*pte);
 279                huge_page_release(page);
 280                pte_clear(pte);
 281        }
 282        mm->rss -= (end - start) >> PAGE_SHIFT;
 283        flush_tlb_range(mm, start, end);
 284}
 285
 286void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length)
 287{
 288        struct mm_struct *mm = vma->vm_mm;
 289        spin_lock(&mm->page_table_lock);
 290        unmap_hugepage_range(vma, start, start + length);
 291        spin_unlock(&mm->page_table_lock);
 292}
 293
 294int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 295{
 296        struct mm_struct *mm = current->mm;
 297        struct inode *inode = mapping->host;
 298        unsigned long addr;
 299        int ret = 0;
 300
 301        BUG_ON(vma->vm_start & ~HPAGE_MASK);
 302        BUG_ON(vma->vm_end & ~HPAGE_MASK);
 303
 304        spin_lock(&mm->page_table_lock);
 305        for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
 306                unsigned long idx;
 307                pte_t *pte = huge_pte_alloc(mm, addr);
 308                struct page *page;
 309
 310                if (!pte) {
 311                        ret = -ENOMEM;
 312                        goto out;
 313                }
 314                if (!pte_none(*pte))
 315                        continue;
 316
 317                idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
 318                        + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
 319                page = find_get_page(mapping, idx);
 320                if (!page) {
 321                        /* charge the fs quota first */
 322                        if (hugetlb_get_quota(mapping)) {
 323                                ret = -ENOMEM;
 324                                goto out;
 325                        }
 326                        page = alloc_hugetlb_page();
 327                        if (!page) {
 328                                hugetlb_put_quota(mapping);
 329                                ret = -ENOMEM;
 330                                goto out;
 331                        }
 332                        add_to_page_cache(page, mapping, idx);
 333                        unlock_page(page);
 334                }
 335                set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
 336        }
 337out:
 338        spin_unlock(&mm->page_table_lock);
 339        return ret;
 340}
 341
 342unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 343                unsigned long pgoff, unsigned long flags)
 344{
 345        struct vm_area_struct *vmm;
 346
 347        if (len > RGN_MAP_LIMIT)
 348                return -ENOMEM;
 349        if (len & ~HPAGE_MASK)
 350                return -EINVAL;
 351        /* This code assumes that REGION_HPAGE != 0. */
 352        if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))
 353                addr = TASK_HPAGE_BASE;
 354        else
 355                addr = COLOR_HALIGN(addr);
 356        for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
 357                /* At this point:  (!vmm || addr < vmm->vm_end). */
 358                if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
 359                        return -ENOMEM;
 360                if (!vmm || (addr + len) <= vmm->vm_start)
 361                        return addr;
 362                addr = COLOR_HALIGN(vmm->vm_end);
 363        }
 364}
 365void update_and_free_page(struct page *page)
 366{
 367        int j;
 368        struct page *map;
 369
 370        map = page;
 371        htlbzone_pages--;
 372        for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
 373                map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
 374                                1 << PG_dirty | 1 << PG_active | 1 << PG_reserved);
 375                set_page_count(map, 0);
 376                map++;
 377        }
 378        set_page_count(page, 1);
 379        __free_pages(page, HUGETLB_PAGE_ORDER);
 380}
 381
 382int try_to_free_low(int count)
 383{
 384        struct list_head *p;
 385        struct page *page, *map;
 386
 387        map = NULL;
 388        spin_lock(&htlbpage_lock);
 389        list_for_each(p, &htlbpage_freelist) {
 390                if (map) {
 391                        list_del(&map->list);
 392                        update_and_free_page(map);
 393                        htlbpagemem--;
 394                        map = NULL;
 395                        if (++count == 0)
 396                                break;
 397                }
 398                page = list_entry(p, struct page, list);
 399                if ((page_zone(page))->name[0] != 'H') //Look for non-Highmem zones.
 400                        map = page;
 401        }
 402        if (map) {
 403                list_del(&map->list);
 404                update_and_free_page(map);
 405                htlbpagemem--;
 406                count++;
 407        }
 408        spin_unlock(&htlbpage_lock);
 409        return count;
 410}
 411
 412int set_hugetlb_mem_size(int count)
 413{
 414        int j, lcount;
 415        struct page *page, *map;
 416
 417        if (count < 0)
 418                lcount = count;
 419        else
 420                lcount = count - htlbzone_pages;
 421
 422        if (lcount == 0)
 423                return (int)htlbzone_pages;
 424        if (lcount > 0) {       /* Increase the mem size. */
 425                while (lcount--) {
 426                        page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 427                        if (page == NULL)
 428                                break;
 429                        map = page;
 430                        for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
 431                                SetPageReserved(map);
 432                                map++;
 433                        }
 434                        spin_lock(&htlbpage_lock);
 435                        list_add(&page->list, &htlbpage_freelist);
 436                        htlbpagemem++;
 437                        htlbzone_pages++;
 438                        spin_unlock(&htlbpage_lock);
 439                }
 440                return (int) htlbzone_pages;
 441        }
 442        /* Shrink the memory size. */
 443        lcount = try_to_free_low(lcount);
 444        while (lcount++ < 0) {
 445                page = alloc_hugetlb_page();
 446                if (page == NULL)
 447                        break;
 448                spin_lock(&htlbpage_lock);
 449                update_and_free_page(page);
 450                spin_unlock(&htlbpage_lock);
 451        }
 452        return (int) htlbzone_pages;
 453}
 454
 455int hugetlb_sysctl_handler(ctl_table *table, int write, struct file *file, void *buffer, size_t *length)
 456{
 457        proc_dointvec(table, write, file, buffer, length);
 458        htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
 459        return 0;
 460}
 461
 462static int __init hugetlb_setup(char *s)
 463{
 464        if (sscanf(s, "%d", &htlbpage_max) <= 0)
 465                htlbpage_max = 0;
 466        return 1;
 467}
 468__setup("hugepages=", hugetlb_setup);
 469
 470static int __init hugetlb_init(void)
 471{
 472        int i, j;
 473        struct page *page;
 474
 475        for (i = 0; i < htlbpage_max; ++i) {
 476                page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 477                if (!page)
 478                        break;
 479                for (j = 0; j < HPAGE_SIZE/PAGE_SIZE; ++j)
 480                        SetPageReserved(&page[j]);
 481                spin_lock(&htlbpage_lock);
 482                list_add(&page->list, &htlbpage_freelist);
 483                spin_unlock(&htlbpage_lock);
 484        }
 485        htlbpage_max = htlbpagemem = htlbzone_pages = i;
 486        printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
 487        return 0;
 488}
 489module_init(hugetlb_init);
 490
 491int hugetlb_report_meminfo(char *buf)
 492{
 493        return sprintf(buf,
 494                        "HugePages_Total: %5lu\n"
 495                        "HugePages_Free:  %5lu\n"
 496                        "Hugepagesize:    %5lu kB\n",
 497                        htlbzone_pages,
 498                        htlbpagemem,
 499                        HPAGE_SIZE/1024);
 500}
 501
 502int is_hugepage_mem_enough(size_t size)
 503{
 504        if (size > (htlbpagemem << HPAGE_SHIFT))
 505                return 0;
 506        return 1;
 507}
 508
 509static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused)
 510{
 511        BUG();
 512        return NULL;
 513}
 514
 515struct vm_operations_struct hugetlb_vm_ops = {
 516        .nopage =       hugetlb_nopage,
 517};
 518
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.