linux/mm/vmalloc.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/vmalloc.c
   3 *
   4 *  Copyright (C) 1993  Linus Torvalds
   5 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   6 *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   7 *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
   8 *  Numa awareness, Christoph Lameter, SGI, June 2005
   9 */
  10
  11#include <linux/mm.h>
  12#include <linux/module.h>
  13#include <linux/highmem.h>
  14#include <linux/slab.h>
  15#include <linux/spinlock.h>
  16#include <linux/interrupt.h>
  17#include <linux/seq_file.h>
  18#include <linux/debugobjects.h>
  19#include <linux/vmalloc.h>
  20#include <linux/kallsyms.h>
  21
  22#include <asm/uaccess.h>
  23#include <asm/tlbflush.h>
  24
  25
  26DEFINE_RWLOCK(vmlist_lock);
  27struct vm_struct *vmlist;
  28
  29static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
  30                            int node, void *caller);
  31
  32static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
  33{
  34        pte_t *pte;
  35
  36        pte = pte_offset_kernel(pmd, addr);
  37        do {
  38                pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
  39                WARN_ON(!pte_none(ptent) && !pte_present(ptent));
  40        } while (pte++, addr += PAGE_SIZE, addr != end);
  41}
  42
  43static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
  44                                                unsigned long end)
  45{
  46        pmd_t *pmd;
  47        unsigned long next;
  48
  49        pmd = pmd_offset(pud, addr);
  50        do {
  51                next = pmd_addr_end(addr, end);
  52                if (pmd_none_or_clear_bad(pmd))
  53                        continue;
  54                vunmap_pte_range(pmd, addr, next);
  55        } while (pmd++, addr = next, addr != end);
  56}
  57
  58static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
  59                                                unsigned long end)
  60{
  61        pud_t *pud;
  62        unsigned long next;
  63
  64        pud = pud_offset(pgd, addr);
  65        do {
  66                next = pud_addr_end(addr, end);
  67                if (pud_none_or_clear_bad(pud))
  68                        continue;
  69                vunmap_pmd_range(pud, addr, next);
  70        } while (pud++, addr = next, addr != end);
  71}
  72
  73void unmap_kernel_range(unsigned long addr, unsigned long size)
  74{
  75        pgd_t *pgd;
  76        unsigned long next;
  77        unsigned long start = addr;
  78        unsigned long end = addr + size;
  79
  80        BUG_ON(addr >= end);
  81        pgd = pgd_offset_k(addr);
  82        flush_cache_vunmap(addr, end);
  83        do {
  84                next = pgd_addr_end(addr, end);
  85                if (pgd_none_or_clear_bad(pgd))
  86                        continue;
  87                vunmap_pud_range(pgd, addr, next);
  88        } while (pgd++, addr = next, addr != end);
  89        flush_tlb_kernel_range(start, end);
  90}
  91
  92static void unmap_vm_area(struct vm_struct *area)
  93{
  94        unmap_kernel_range((unsigned long)area->addr, area->size);
  95}
  96
  97static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
  98                        unsigned long end, pgprot_t prot, struct page ***pages)
  99{
 100        pte_t *pte;
 101
 102        pte = pte_alloc_kernel(pmd, addr);
 103        if (!pte)
 104                return -ENOMEM;
 105        do {
 106                struct page *page = **pages;
 107                WARN_ON(!pte_none(*pte));
 108                if (!page)
 109                        return -ENOMEM;
 110                set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
 111                (*pages)++;
 112        } while (pte++, addr += PAGE_SIZE, addr != end);
 113        return 0;
 114}
 115
 116static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
 117                        unsigned long end, pgprot_t prot, struct page ***pages)
 118{
 119        pmd_t *pmd;
 120        unsigned long next;
 121
 122        pmd = pmd_alloc(&init_mm, pud, addr);
 123        if (!pmd)
 124                return -ENOMEM;
 125        do {
 126                next = pmd_addr_end(addr, end);
 127                if (vmap_pte_range(pmd, addr, next, prot, pages))
 128                        return -ENOMEM;
 129        } while (pmd++, addr = next, addr != end);
 130        return 0;
 131}
 132
 133static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 134                        unsigned long end, pgprot_t prot, struct page ***pages)
 135{
 136        pud_t *pud;
 137        unsigned long next;
 138
 139        pud = pud_alloc(&init_mm, pgd, addr);
 140        if (!pud)
 141                return -ENOMEM;
 142        do {
 143                next = pud_addr_end(addr, end);
 144                if (vmap_pmd_range(pud, addr, next, prot, pages))
 145                        return -ENOMEM;
 146        } while (pud++, addr = next, addr != end);
 147        return 0;
 148}
 149
 150int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 151{
 152        pgd_t *pgd;
 153        unsigned long next;
 154        unsigned long addr = (unsigned long) area->addr;
 155        unsigned long end = addr + area->size - PAGE_SIZE;
 156        int err;
 157
 158        BUG_ON(addr >= end);
 159        pgd = pgd_offset_k(addr);
 160        do {
 161                next = pgd_addr_end(addr, end);
 162                err = vmap_pud_range(pgd, addr, next, prot, pages);
 163                if (err)
 164                        break;
 165        } while (pgd++, addr = next, addr != end);
 166        flush_cache_vmap((unsigned long) area->addr, end);
 167        return err;
 168}
 169EXPORT_SYMBOL_GPL(map_vm_area);
 170
 171/*
 172 * Map a vmalloc()-space virtual address to the physical page.
 173 */
 174struct page *vmalloc_to_page(const void *vmalloc_addr)
 175{
 176        unsigned long addr = (unsigned long) vmalloc_addr;
 177        struct page *page = NULL;
 178        pgd_t *pgd = pgd_offset_k(addr);
 179        pud_t *pud;
 180        pmd_t *pmd;
 181        pte_t *ptep, pte;
 182
 183        if (!pgd_none(*pgd)) {
 184                pud = pud_offset(pgd, addr);
 185                if (!pud_none(*pud)) {
 186                        pmd = pmd_offset(pud, addr);
 187                        if (!pmd_none(*pmd)) {
 188                                ptep = pte_offset_map(pmd, addr);
 189                                pte = *ptep;
 190                                if (pte_present(pte))
 191                                        page = pte_page(pte);
 192                                pte_unmap(ptep);
 193                        }
 194                }
 195        }
 196        return page;
 197}
 198EXPORT_SYMBOL(vmalloc_to_page);
 199
 200/*
 201 * Map a vmalloc()-space virtual address to the physical page frame number.
 202 */
 203unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
 204{
 205        return page_to_pfn(vmalloc_to_page(vmalloc_addr));
 206}
 207EXPORT_SYMBOL(vmalloc_to_pfn);
 208
 209static struct vm_struct *
 210__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
 211                unsigned long end, int node, gfp_t gfp_mask, void *caller)
 212{
 213        struct vm_struct **p, *tmp, *area;
 214        unsigned long align = 1;
 215        unsigned long addr;
 216
 217        BUG_ON(in_interrupt());
 218        if (flags & VM_IOREMAP) {
 219                int bit = fls(size);
 220
 221                if (bit > IOREMAP_MAX_ORDER)
 222                        bit = IOREMAP_MAX_ORDER;
 223                else if (bit < PAGE_SHIFT)
 224                        bit = PAGE_SHIFT;
 225
 226                align = 1ul << bit;
 227        }
 228        addr = ALIGN(start, align);
 229        size = PAGE_ALIGN(size);
 230        if (unlikely(!size))
 231                return NULL;
 232
 233        area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
 234
 235        if (unlikely(!area))
 236                return NULL;
 237
 238        /*
 239         * We always allocate a guard page.
 240         */
 241        size += PAGE_SIZE;
 242
 243        write_lock(&vmlist_lock);
 244        for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
 245                if ((unsigned long)tmp->addr < addr) {
 246                        if((unsigned long)tmp->addr + tmp->size >= addr)
 247                                addr = ALIGN(tmp->size + 
 248                                             (unsigned long)tmp->addr, align);
 249                        continue;
 250                }
 251                if ((size + addr) < addr)
 252                        goto out;
 253                if (size + addr <= (unsigned long)tmp->addr)
 254                        goto found;
 255                addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
 256                if (addr > end - size)
 257                        goto out;
 258        }
 259        if ((size + addr) < addr)
 260                goto out;
 261        if (addr > end - size)
 262                goto out;
 263
 264found:
 265        area->next = *p;
 266        *p = area;
 267
 268        area->flags = flags;
 269        area->addr = (void *)addr;
 270        area->size = size;
 271        area->pages = NULL;
 272        area->nr_pages = 0;
 273        area->phys_addr = 0;
 274        area->caller = caller;
 275        write_unlock(&vmlist_lock);
 276
 277        return area;
 278
 279out:
 280        write_unlock(&vmlist_lock);
 281        kfree(area);
 282        if (printk_ratelimit())
 283                printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
 284        return NULL;
 285}
 286
 287struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 288                                unsigned long start, unsigned long end)
 289{
 290        return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
 291                                                __builtin_return_address(0));
 292}
 293EXPORT_SYMBOL_GPL(__get_vm_area);
 294
 295/**
 296 *      get_vm_area  -  reserve a contiguous kernel virtual area
 297 *      @size:          size of the area
 298 *      @flags:         %VM_IOREMAP for I/O mappings or VM_ALLOC
 299 *
 300 *      Search an area of @size in the kernel virtual mapping area,
 301 *      and reserved it for out purposes.  Returns the area descriptor
 302 *      on success or %NULL on failure.
 303 */
 304struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 305{
 306        return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
 307                                -1, GFP_KERNEL, __builtin_return_address(0));
 308}
 309
 310struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
 311                                void *caller)
 312{
 313        return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
 314                                                -1, GFP_KERNEL, caller);
 315}
 316
 317struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
 318                                   int node, gfp_t gfp_mask)
 319{
 320        return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
 321                                  gfp_mask, __builtin_return_address(0));
 322}
 323
 324/* Caller must hold vmlist_lock */
 325static struct vm_struct *__find_vm_area(const void *addr)
 326{
 327        struct vm_struct *tmp;
 328
 329        for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
 330                 if (tmp->addr == addr)
 331                        break;
 332        }
 333
 334        return tmp;
 335}
 336
 337/* Caller must hold vmlist_lock */
 338static struct vm_struct *__remove_vm_area(const void *addr)
 339{
 340        struct vm_struct **p, *tmp;
 341
 342        for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
 343                 if (tmp->addr == addr)
 344                         goto found;
 345        }
 346        return NULL;
 347
 348found:
 349        unmap_vm_area(tmp);
 350        *p = tmp->next;
 351
 352        /*
 353         * Remove the guard page.
 354         */
 355        tmp->size -= PAGE_SIZE;
 356        return tmp;
 357}
 358
 359/**
 360 *      remove_vm_area  -  find and remove a continuous kernel virtual area
 361 *      @addr:          base address
 362 *
 363 *      Search for the kernel VM area starting at @addr, and remove it.
 364 *      This function returns the found VM area, but using it is NOT safe
 365 *      on SMP machines, except for its size or flags.
 366 */
 367struct vm_struct *remove_vm_area(const void *addr)
 368{
 369        struct vm_struct *v;
 370        write_lock(&vmlist_lock);
 371        v = __remove_vm_area(addr);
 372        write_unlock(&vmlist_lock);
 373        return v;
 374}
 375
 376static void __vunmap(const void *addr, int deallocate_pages)
 377{
 378        struct vm_struct *area;
 379
 380        if (!addr)
 381                return;
 382
 383        if ((PAGE_SIZE-1) & (unsigned long)addr) {
 384                WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
 385                return;
 386        }
 387
 388        area = remove_vm_area(addr);
 389        if (unlikely(!area)) {
 390                WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 391                                addr);
 392                return;
 393        }
 394
 395        debug_check_no_locks_freed(addr, area->size);
 396        debug_check_no_obj_freed(addr, area->size);
 397
 398        if (deallocate_pages) {
 399                int i;
 400
 401                for (i = 0; i < area->nr_pages; i++) {
 402                        struct page *page = area->pages[i];
 403
 404                        BUG_ON(!page);
 405                        __free_page(page);
 406                }
 407
 408                if (area->flags & VM_VPAGES)
 409                        vfree(area->pages);
 410                else
 411                        kfree(area->pages);
 412        }
 413
 414        kfree(area);
 415        return;
 416}
 417
 418/**
 419 *      vfree  -  release memory allocated by vmalloc()
 420 *      @addr:          memory base address
 421 *
 422 *      Free the virtually continuous memory area starting at @addr, as
 423 *      obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
 424 *      NULL, no operation is performed.
 425 *
 426 *      Must not be called in interrupt context.
 427 */
 428void vfree(const void *addr)
 429{
 430        BUG_ON(in_interrupt());
 431        __vunmap(addr, 1);
 432}
 433EXPORT_SYMBOL(vfree);
 434
 435/**
 436 *      vunmap  -  release virtual mapping obtained by vmap()
 437 *      @addr:          memory base address
 438 *
 439 *      Free the virtually contiguous memory area starting at @addr,
 440 *      which was created from the page array passed to vmap().
 441 *
 442 *      Must not be called in interrupt context.
 443 */
 444void vunmap(const void *addr)
 445{
 446        BUG_ON(in_interrupt());
 447        __vunmap(addr, 0);
 448}
 449EXPORT_SYMBOL(vunmap);
 450
 451/**
 452 *      vmap  -  map an array of pages into virtually contiguous space
 453 *      @pages:         array of page pointers
 454 *      @count:         number of pages to map
 455 *      @flags:         vm_area->flags
 456 *      @prot:          page protection for the mapping
 457 *
 458 *      Maps @count pages from @pages into contiguous kernel virtual
 459 *      space.
 460 */
 461void *vmap(struct page **pages, unsigned int count,
 462                unsigned long flags, pgprot_t prot)
 463{
 464        struct vm_struct *area;
 465
 466        if (count > num_physpages)
 467                return NULL;
 468
 469        area = get_vm_area_caller((count << PAGE_SHIFT), flags,
 470                                        __builtin_return_address(0));
 471        if (!area)
 472                return NULL;
 473
 474        if (map_vm_area(area, prot, &pages)) {
 475                vunmap(area->addr);
 476                return NULL;
 477        }
 478
 479        return area->addr;
 480}
 481EXPORT_SYMBOL(vmap);
 482
 483static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 484                                 pgprot_t prot, int node, void *caller)
 485{
 486        struct page **pages;
 487        unsigned int nr_pages, array_size, i;
 488
 489        nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
 490        array_size = (nr_pages * sizeof(struct page *));
 491
 492        area->nr_pages = nr_pages;
 493        /* Please note that the recursion is strictly bounded. */
 494        if (array_size > PAGE_SIZE) {
 495                pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
 496                                PAGE_KERNEL, node, caller);
 497                area->flags |= VM_VPAGES;
 498        } else {
 499                pages = kmalloc_node(array_size,
 500                                (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
 501                                node);
 502        }
 503        area->pages = pages;
 504        area->caller = caller;
 505        if (!area->pages) {
 506                remove_vm_area(area->addr);
 507                kfree(area);
 508                return NULL;
 509        }
 510
 511        for (i = 0; i < area->nr_pages; i++) {
 512                struct page *page;
 513
 514                if (node < 0)
 515                        page = alloc_page(gfp_mask);
 516                else
 517                        page = alloc_pages_node(node, gfp_mask, 0);
 518
 519                if (unlikely(!page)) {
 520                        /* Successfully allocated i pages, free them in __vunmap() */
 521                        area->nr_pages = i;
 522                        goto fail;
 523                }
 524                area->pages[i] = page;
 525        }
 526
 527        if (map_vm_area(area, prot, &pages))
 528                goto fail;
 529        return area->addr;
 530
 531fail:
 532        vfree(area->addr);
 533        return NULL;
 534}
 535
 536void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 537{
 538        return __vmalloc_area_node(area, gfp_mask, prot, -1,
 539                                        __builtin_return_address(0));
 540}
 541
 542/**
 543 *      __vmalloc_node  -  allocate virtually contiguous memory
 544 *      @size:          allocation size
 545 *      @gfp_mask:      flags for the page level allocator
 546 *      @prot:          protection mask for the allocated pages
 547 *      @node:          node to use for allocation or -1
 548 *      @caller:        caller's return address
 549 *
 550 *      Allocate enough pages to cover @size from the page level
 551 *      allocator with @gfp_mask flags.  Map them into contiguous
 552 *      kernel virtual space, using a pagetable protection of @prot.
 553 */
 554static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 555                                                int node, void *caller)
 556{
 557        struct vm_struct *area;
 558
 559        size = PAGE_ALIGN(size);
 560        if (!size || (size >> PAGE_SHIFT) > num_physpages)
 561                return NULL;
 562
 563        area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
 564                                                node, gfp_mask, caller);
 565
 566        if (!area)
 567                return NULL;
 568
 569        return __vmalloc_area_node(area, gfp_mask, prot, node, caller);
 570}
 571
 572void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 573{
 574        return __vmalloc_node(size, gfp_mask, prot, -1,
 575                                __builtin_return_address(0));
 576}
 577EXPORT_SYMBOL(__vmalloc);
 578
 579/**
 580 *      vmalloc  -  allocate virtually contiguous memory
 581 *      @size:          allocation size
 582 *      Allocate enough pages to cover @size from the page level
 583 *      allocator and map them into contiguous kernel virtual space.
 584 *
 585 *      For tight control over page level allocator and protection flags
 586 *      use __vmalloc() instead.
 587 */
 588void *vmalloc(unsigned long size)
 589{
 590        return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
 591                                        -1, __builtin_return_address(0));
 592}
 593EXPORT_SYMBOL(vmalloc);
 594
 595/**
 596 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
 597 * @size: allocation size
 598 *
 599 * The resulting memory area is zeroed so it can be mapped to userspace
 600 * without leaking data.
 601 */
 602void *vmalloc_user(unsigned long size)
 603{
 604        struct vm_struct *area;
 605        void *ret;
 606
 607        ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
 608        if (ret) {
 609                write_lock(&vmlist_lock);
 610                area = __find_vm_area(ret);
 611                area->flags |= VM_USERMAP;
 612                write_unlock(&vmlist_lock);
 613        }
 614        return ret;
 615}
 616EXPORT_SYMBOL(vmalloc_user);
 617
 618/**
 619 *      vmalloc_node  -  allocate memory on a specific node
 620 *      @size:          allocation size
 621 *      @node:          numa node
 622 *
 623 *      Allocate enough pages to cover @size from the page level
 624 *      allocator and map them into contiguous kernel virtual space.
 625 *
 626 *      For tight control over page level allocator and protection flags
 627 *      use __vmalloc() instead.
 628 */
 629void *vmalloc_node(unsigned long size, int node)
 630{
 631        return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
 632                                        node, __builtin_return_address(0));
 633}
 634EXPORT_SYMBOL(vmalloc_node);
 635
 636#ifndef PAGE_KERNEL_EXEC
 637# define PAGE_KERNEL_EXEC PAGE_KERNEL
 638#endif
 639
 640/**
 641 *      vmalloc_exec  -  allocate virtually contiguous, executable memory
 642 *      @size:          allocation size
 643 *
 644 *      Kernel-internal function to allocate enough pages to cover @size
 645 *      the page level allocator and map them into contiguous and
 646 *      executable kernel virtual space.
 647 *
 648 *      For tight control over page level allocator and protection flags
 649 *      use __vmalloc() instead.
 650 */
 651
 652void *vmalloc_exec(unsigned long size)
 653{
 654        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
 655}
 656
 657#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
 658#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
 659#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
 660#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
 661#else
 662#define GFP_VMALLOC32 GFP_KERNEL
 663#endif
 664
 665/**
 666 *      vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
 667 *      @size:          allocation size
 668 *
 669 *      Allocate enough 32bit PA addressable pages to cover @size from the
 670 *      page level allocator and map them into contiguous kernel virtual space.
 671 */
 672void *vmalloc_32(unsigned long size)
 673{
 674        return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL);
 675}
 676EXPORT_SYMBOL(vmalloc_32);
 677
 678/**
 679 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
 680 *      @size:          allocation size
 681 *
 682 * The resulting memory area is 32bit addressable and zeroed so it can be
 683 * mapped to userspace without leaking data.
 684 */
 685void *vmalloc_32_user(unsigned long size)
 686{
 687        struct vm_struct *area;
 688        void *ret;
 689
 690        ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
 691        if (ret) {
 692                write_lock(&vmlist_lock);
 693                area = __find_vm_area(ret);
 694                area->flags |= VM_USERMAP;
 695                write_unlock(&vmlist_lock);
 696        }
 697        return ret;
 698}
 699EXPORT_SYMBOL(vmalloc_32_user);
 700
 701long vread(char *buf, char *addr, unsigned long count)
 702{
 703        struct vm_struct *tmp;
 704        char *vaddr, *buf_start = buf;
 705        unsigned long n;
 706
 707        /* Don't allow overflow */
 708        if ((unsigned long) addr + count < count)
 709                count = -(unsigned long) addr;
 710
 711        read_lock(&vmlist_lock);
 712        for (tmp = vmlist; tmp; tmp = tmp->next) {
 713                vaddr = (char *) tmp->addr;
 714                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 715                        continue;
 716                while (addr < vaddr) {
 717                        if (count == 0)
 718                                goto finished;
 719                        *buf = '\0';
 720                        buf++;
 721                        addr++;
 722                        count--;
 723                }
 724                n = vaddr + tmp->size - PAGE_SIZE - addr;
 725                do {
 726                        if (count == 0)
 727                                goto finished;
 728                        *buf = *addr;
 729                        buf++;
 730                        addr++;
 731                        count--;
 732                } while (--n > 0);
 733        }
 734finished:
 735        read_unlock(&vmlist_lock);
 736        return buf - buf_start;
 737}
 738
 739long vwrite(char *buf, char *addr, unsigned long count)
 740{
 741        struct vm_struct *tmp;
 742        char *vaddr, *buf_start = buf;
 743        unsigned long n;
 744
 745        /* Don't allow overflow */
 746        if ((unsigned long) addr + count < count)
 747                count = -(unsigned long) addr;
 748
 749        read_lock(&vmlist_lock);
 750        for (tmp = vmlist; tmp; tmp = tmp->next) {
 751                vaddr = (char *) tmp->addr;
 752                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 753                        continue;
 754                while (addr < vaddr) {
 755                        if (count == 0)
 756                                goto finished;
 757                        buf++;
 758                        addr++;
 759                        count--;
 760                }
 761                n = vaddr + tmp->size - PAGE_SIZE - addr;
 762                do {
 763                        if (count == 0)
 764                                goto finished;
 765                        *addr = *buf;
 766                        buf++;
 767                        addr++;
 768                        count--;
 769                } while (--n > 0);
 770        }
 771finished:
 772        read_unlock(&vmlist_lock);
 773        return buf - buf_start;
 774}
 775
 776/**
 777 *      remap_vmalloc_range  -  map vmalloc pages to userspace
 778 *      @vma:           vma to cover (map full range of vma)
 779 *      @addr:          vmalloc memory
 780 *      @pgoff:         number of pages into addr before first page to map
 781 *
 782 *      Returns:        0 for success, -Exxx on failure
 783 *
 784 *      This function checks that addr is a valid vmalloc'ed area, and
 785 *      that it is big enough to cover the vma. Will return failure if
 786 *      that criteria isn't met.
 787 *
 788 *      Similar to remap_pfn_range() (see mm/memory.c)
 789 */
 790int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 791                                                unsigned long pgoff)
 792{
 793        struct vm_struct *area;
 794        unsigned long uaddr = vma->vm_start;
 795        unsigned long usize = vma->vm_end - vma->vm_start;
 796        int ret;
 797
 798        if ((PAGE_SIZE-1) & (unsigned long)addr)
 799                return -EINVAL;
 800
 801        read_lock(&vmlist_lock);
 802        area = __find_vm_area(addr);
 803        if (!area)
 804                goto out_einval_locked;
 805
 806        if (!(area->flags & VM_USERMAP))
 807                goto out_einval_locked;
 808
 809        if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
 810                goto out_einval_locked;
 811        read_unlock(&vmlist_lock);
 812
 813        addr += pgoff << PAGE_SHIFT;
 814        do {
 815                struct page *page = vmalloc_to_page(addr);
 816                ret = vm_insert_page(vma, uaddr, page);
 817                if (ret)
 818                        return ret;
 819
 820                uaddr += PAGE_SIZE;
 821                addr += PAGE_SIZE;
 822                usize -= PAGE_SIZE;
 823        } while (usize > 0);
 824
 825        /* Prevent "things" like memory migration? VM_flags need a cleanup... */
 826        vma->vm_flags |= VM_RESERVED;
 827
 828        return ret;
 829
 830out_einval_locked:
 831        read_unlock(&vmlist_lock);
 832        return -EINVAL;
 833}
 834EXPORT_SYMBOL(remap_vmalloc_range);
 835
 836/*
 837 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
 838 * have one.
 839 */
 840void  __attribute__((weak)) vmalloc_sync_all(void)
 841{
 842}
 843
 844
 845static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
 846{
 847        /* apply_to_page_range() does all the hard work. */
 848        return 0;
 849}
 850
 851/**
 852 *      alloc_vm_area - allocate a range of kernel address space
 853 *      @size:          size of the area
 854 *
 855 *      Returns:        NULL on failure, vm_struct on success
 856 *
 857 *      This function reserves a range of kernel address space, and
 858 *      allocates pagetables to map that range.  No actual mappings
 859 *      are created.  If the kernel address space is not shared
 860 *      between processes, it syncs the pagetable across all
 861 *      processes.
 862 */
 863struct vm_struct *alloc_vm_area(size_t size)
 864{
 865        struct vm_struct *area;
 866
 867        area = get_vm_area_caller(size, VM_IOREMAP,
 868                                __builtin_return_address(0));
 869        if (area == NULL)
 870                return NULL;
 871
 872        /*
 873         * This ensures that page tables are constructed for this region
 874         * of kernel virtual address space and mapped into init_mm.
 875         */
 876        if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
 877                                area->size, f, NULL)) {
 878                free_vm_area(area);
 879                return NULL;
 880        }
 881
 882        /* Make sure the pagetables are constructed in process kernel
 883           mappings */
 884        vmalloc_sync_all();
 885
 886        return area;
 887}
 888EXPORT_SYMBOL_GPL(alloc_vm_area);
 889
 890void free_vm_area(struct vm_struct *area)
 891{
 892        struct vm_struct *ret;
 893        ret = remove_vm_area(area->addr);
 894        BUG_ON(ret != area);
 895        kfree(area);
 896}
 897EXPORT_SYMBOL_GPL(free_vm_area);
 898
 899
 900#ifdef CONFIG_PROC_FS
 901static void *s_start(struct seq_file *m, loff_t *pos)
 902{
 903        loff_t n = *pos;
 904        struct vm_struct *v;
 905
 906        read_lock(&vmlist_lock);
 907        v = vmlist;
 908        while (n > 0 && v) {
 909                n--;
 910                v = v->next;
 911        }
 912        if (!n)
 913                return v;
 914
 915        return NULL;
 916
 917}
 918
 919static void *s_next(struct seq_file *m, void *p, loff_t *pos)
 920{
 921        struct vm_struct *v = p;
 922
 923        ++*pos;
 924        return v->next;
 925}
 926
 927static void s_stop(struct seq_file *m, void *p)
 928{
 929        read_unlock(&vmlist_lock);
 930}
 931
 932static void show_numa_info(struct seq_file *m, struct vm_struct *v)
 933{
 934        if (NUMA_BUILD) {
 935                unsigned int nr, *counters = m->private;
 936
 937                if (!counters)
 938                        return;
 939
 940                memset(counters, 0, nr_node_ids * sizeof(unsigned int));
 941
 942                for (nr = 0; nr < v->nr_pages; nr++)
 943                        counters[page_to_nid(v->pages[nr])]++;
 944
 945                for_each_node_state(nr, N_HIGH_MEMORY)
 946                        if (counters[nr])
 947                                seq_printf(m, " N%u=%u", nr, counters[nr]);
 948        }
 949}
 950
 951static int s_show(struct seq_file *m, void *p)
 952{
 953        struct vm_struct *v = p;
 954
 955        seq_printf(m, "0x%p-0x%p %7ld",
 956                v->addr, v->addr + v->size, v->size);
 957
 958        if (v->caller) {
 959                char buff[2 * KSYM_NAME_LEN];
 960
 961                seq_putc(m, ' ');
 962                sprint_symbol(buff, (unsigned long)v->caller);
 963                seq_puts(m, buff);
 964        }
 965
 966        if (v->nr_pages)
 967                seq_printf(m, " pages=%d", v->nr_pages);
 968
 969        if (v->phys_addr)
 970                seq_printf(m, " phys=%lx", v->phys_addr);
 971
 972        if (v->flags & VM_IOREMAP)
 973                seq_printf(m, " ioremap");
 974
 975        if (v->flags & VM_ALLOC)
 976                seq_printf(m, " vmalloc");
 977
 978        if (v->flags & VM_MAP)
 979                seq_printf(m, " vmap");
 980
 981        if (v->flags & VM_USERMAP)
 982                seq_printf(m, " user");
 983
 984        if (v->flags & VM_VPAGES)
 985                seq_printf(m, " vpages");
 986
 987        show_numa_info(m, v);
 988        seq_putc(m, '\n');
 989        return 0;
 990}
 991
 992const struct seq_operations vmalloc_op = {
 993        .start = s_start,
 994        .next = s_next,
 995        .stop = s_stop,
 996        .show = s_show,
 997};
 998#endif
 999
1000