linux-bk/mm/vmalloc.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/vmalloc.c
   3 *
   4 *  Copyright (C) 1993  Linus Torvalds
   5 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   6 *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   7 *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
   8 */
   9
  10#include <linux/mm.h>
  11#include <linux/module.h>
  12#include <linux/highmem.h>
  13#include <linux/slab.h>
  14#include <linux/spinlock.h>
  15#include <linux/interrupt.h>
  16
  17#include <linux/vmalloc.h>
  18
  19#include <asm/uaccess.h>
  20#include <asm/tlbflush.h>
  21
  22
  23DEFINE_RWLOCK(vmlist_lock);
  24struct vm_struct *vmlist;
  25
  26static void unmap_area_pte(pmd_t *pmd, unsigned long address,
  27                                  unsigned long size)
  28{
  29        unsigned long end;
  30        pte_t *pte;
  31
  32        if (pmd_none(*pmd))
  33                return;
  34        if (pmd_bad(*pmd)) {
  35                pmd_ERROR(*pmd);
  36                pmd_clear(pmd);
  37                return;
  38        }
  39
  40        pte = pte_offset_kernel(pmd, address);
  41        address &= ~PMD_MASK;
  42        end = address + size;
  43        if (end > PMD_SIZE)
  44                end = PMD_SIZE;
  45
  46        do {
  47                pte_t page;
  48                page = ptep_get_and_clear(pte);
  49                address += PAGE_SIZE;
  50                pte++;
  51                if (pte_none(page))
  52                        continue;
  53                if (pte_present(page))
  54                        continue;
  55                printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
  56        } while (address < end);
  57}
  58
  59static void unmap_area_pmd(pud_t *pud, unsigned long address,
  60                                  unsigned long size)
  61{
  62        unsigned long end;
  63        pmd_t *pmd;
  64
  65        if (pud_none(*pud))
  66                return;
  67        if (pud_bad(*pud)) {
  68                pud_ERROR(*pud);
  69                pud_clear(pud);
  70                return;
  71        }
  72
  73        pmd = pmd_offset(pud, address);
  74        address &= ~PUD_MASK;
  75        end = address + size;
  76        if (end > PUD_SIZE)
  77                end = PUD_SIZE;
  78
  79        do {
  80                unmap_area_pte(pmd, address, end - address);
  81                address = (address + PMD_SIZE) & PMD_MASK;
  82                pmd++;
  83        } while (address < end);
  84}
  85
  86static void unmap_area_pud(pgd_t *pgd, unsigned long address,
  87                           unsigned long size)
  88{
  89        pud_t *pud;
  90        unsigned long end;
  91
  92        if (pgd_none(*pgd))
  93                return;
  94        if (pgd_bad(*pgd)) {
  95                pgd_ERROR(*pgd);
  96                pgd_clear(pgd);
  97                return;
  98        }
  99
 100        pud = pud_offset(pgd, address);
 101        address &= ~PGDIR_MASK;
 102        end = address + size;
 103        if (end > PGDIR_SIZE)
 104                end = PGDIR_SIZE;
 105
 106        do {
 107                unmap_area_pmd(pud, address, end - address);
 108                address = (address + PUD_SIZE) & PUD_MASK;
 109                pud++;
 110        } while (address && (address < end));
 111}
 112
 113static int map_area_pte(pte_t *pte, unsigned long address,
 114                               unsigned long size, pgprot_t prot,
 115                               struct page ***pages)
 116{
 117        unsigned long end;
 118
 119        address &= ~PMD_MASK;
 120        end = address + size;
 121        if (end > PMD_SIZE)
 122                end = PMD_SIZE;
 123
 124        do {
 125                struct page *page = **pages;
 126                WARN_ON(!pte_none(*pte));
 127                if (!page)
 128                        return -ENOMEM;
 129
 130                set_pte(pte, mk_pte(page, prot));
 131                address += PAGE_SIZE;
 132                pte++;
 133                (*pages)++;
 134        } while (address < end);
 135        return 0;
 136}
 137
 138static int map_area_pmd(pmd_t *pmd, unsigned long address,
 139                               unsigned long size, pgprot_t prot,
 140                               struct page ***pages)
 141{
 142        unsigned long base, end;
 143
 144        base = address & PUD_MASK;
 145        address &= ~PUD_MASK;
 146        end = address + size;
 147        if (end > PUD_SIZE)
 148                end = PUD_SIZE;
 149
 150        do {
 151                pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
 152                if (!pte)
 153                        return -ENOMEM;
 154                if (map_area_pte(pte, address, end - address, prot, pages))
 155                        return -ENOMEM;
 156                address = (address + PMD_SIZE) & PMD_MASK;
 157                pmd++;
 158        } while (address < end);
 159
 160        return 0;
 161}
 162
 163static int map_area_pud(pud_t *pud, unsigned long address,
 164                               unsigned long end, pgprot_t prot,
 165                               struct page ***pages)
 166{
 167        do {
 168                pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
 169                if (!pmd)
 170                        return -ENOMEM;
 171                if (map_area_pmd(pmd, address, end - address, prot, pages))
 172                        return -ENOMEM;
 173                address = (address + PUD_SIZE) & PUD_MASK;
 174                pud++;
 175        } while (address && address < end);
 176
 177        return 0;
 178}
 179
 180void unmap_vm_area(struct vm_struct *area)
 181{
 182        unsigned long address = (unsigned long) area->addr;
 183        unsigned long end = (address + area->size);
 184        unsigned long next;
 185        pgd_t *pgd;
 186        int i;
 187
 188        pgd = pgd_offset_k(address);
 189        flush_cache_vunmap(address, end);
 190        for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
 191                next = (address + PGDIR_SIZE) & PGDIR_MASK;
 192                if (next <= address || next > end)
 193                        next = end;
 194                unmap_area_pud(pgd, address, next - address);
 195                address = next;
 196                pgd++;
 197        }
 198        flush_tlb_kernel_range((unsigned long) area->addr, end);
 199}
 200
 201int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 202{
 203        unsigned long address = (unsigned long) area->addr;
 204        unsigned long end = address + (area->size-PAGE_SIZE);
 205        unsigned long next;
 206        pgd_t *pgd;
 207        int err = 0;
 208        int i;
 209
 210        pgd = pgd_offset_k(address);
 211        spin_lock(&init_mm.page_table_lock);
 212        for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
 213                pud_t *pud = pud_alloc(&init_mm, pgd, address);
 214                if (!pud) {
 215                        err = -ENOMEM;
 216                        break;
 217                }
 218                next = (address + PGDIR_SIZE) & PGDIR_MASK;
 219                if (next < address || next > end)
 220                        next = end;
 221                if (map_area_pud(pud, address, next, prot, pages)) {
 222                        err = -ENOMEM;
 223                        break;
 224                }
 225
 226                address = next;
 227                pgd++;
 228        }
 229
 230        spin_unlock(&init_mm.page_table_lock);
 231        flush_cache_vmap((unsigned long) area->addr, end);
 232        return err;
 233}
 234
 235#define IOREMAP_MAX_ORDER       (7 + PAGE_SHIFT)        /* 128 pages */
 236
 237struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 238                                unsigned long start, unsigned long end)
 239{
 240        struct vm_struct **p, *tmp, *area;
 241        unsigned long align = 1;
 242        unsigned long addr;
 243
 244        if (flags & VM_IOREMAP) {
 245                int bit = fls(size);
 246
 247                if (bit > IOREMAP_MAX_ORDER)
 248                        bit = IOREMAP_MAX_ORDER;
 249                else if (bit < PAGE_SHIFT)
 250                        bit = PAGE_SHIFT;
 251
 252                align = 1ul << bit;
 253        }
 254        addr = ALIGN(start, align);
 255
 256        area = kmalloc(sizeof(*area), GFP_KERNEL);
 257        if (unlikely(!area))
 258                return NULL;
 259
 260        /*
 261         * We always allocate a guard page.
 262         */
 263        size += PAGE_SIZE;
 264        if (unlikely(!size)) {
 265                kfree (area);
 266                return NULL;
 267        }
 268
 269        write_lock(&vmlist_lock);
 270        for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
 271                if ((unsigned long)tmp->addr < addr) {
 272                        if((unsigned long)tmp->addr + tmp->size >= addr)
 273                                addr = ALIGN(tmp->size + 
 274                                             (unsigned long)tmp->addr, align);
 275                        continue;
 276                }
 277                if ((size + addr) < addr)
 278                        goto out;
 279                if (size + addr <= (unsigned long)tmp->addr)
 280                        goto found;
 281                addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
 282                if (addr > end - size)
 283                        goto out;
 284        }
 285
 286found:
 287        area->next = *p;
 288        *p = area;
 289
 290        area->flags = flags;
 291        area->addr = (void *)addr;
 292        area->size = size;
 293        area->pages = NULL;
 294        area->nr_pages = 0;
 295        area->phys_addr = 0;
 296        write_unlock(&vmlist_lock);
 297
 298        return area;
 299
 300out:
 301        write_unlock(&vmlist_lock);
 302        kfree(area);
 303        if (printk_ratelimit())
 304                printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
 305        return NULL;
 306}
 307
 308/**
 309 *      get_vm_area  -  reserve a contingous kernel virtual area
 310 *
 311 *      @size:          size of the area
 312 *      @flags:         %VM_IOREMAP for I/O mappings or VM_ALLOC
 313 *
 314 *      Search an area of @size in the kernel virtual mapping area,
 315 *      and reserved it for out purposes.  Returns the area descriptor
 316 *      on success or %NULL on failure.
 317 */
 318struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 319{
 320        return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 321}
 322
 323/**
 324 *      remove_vm_area  -  find and remove a contingous kernel virtual area
 325 *
 326 *      @addr:          base address
 327 *
 328 *      Search for the kernel VM area starting at @addr, and remove it.
 329 *      This function returns the found VM area, but using it is NOT safe
 330 *      on SMP machines.
 331 */
 332struct vm_struct *remove_vm_area(void *addr)
 333{
 334        struct vm_struct **p, *tmp;
 335
 336        write_lock(&vmlist_lock);
 337        for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
 338                 if (tmp->addr == addr)
 339                         goto found;
 340        }
 341        write_unlock(&vmlist_lock);
 342        return NULL;
 343
 344found:
 345        unmap_vm_area(tmp);
 346        *p = tmp->next;
 347        write_unlock(&vmlist_lock);
 348        return tmp;
 349}
 350
 351void __vunmap(void *addr, int deallocate_pages)
 352{
 353        struct vm_struct *area;
 354
 355        if (!addr)
 356                return;
 357
 358        if ((PAGE_SIZE-1) & (unsigned long)addr) {
 359                printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
 360                WARN_ON(1);
 361                return;
 362        }
 363
 364        area = remove_vm_area(addr);
 365        if (unlikely(!area)) {
 366                printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 367                                addr);
 368                WARN_ON(1);
 369                return;
 370        }
 371        
 372        if (deallocate_pages) {
 373                int i;
 374
 375                for (i = 0; i < area->nr_pages; i++) {
 376                        if (unlikely(!area->pages[i]))
 377                                BUG();
 378                        __free_page(area->pages[i]);
 379                }
 380
 381                if (area->nr_pages > PAGE_SIZE/sizeof(struct page *))
 382                        vfree(area->pages);
 383                else
 384                        kfree(area->pages);
 385        }
 386
 387        kfree(area);
 388        return;
 389}
 390
 391/**
 392 *      vfree  -  release memory allocated by vmalloc()
 393 *
 394 *      @addr:          memory base address
 395 *
 396 *      Free the virtually contiguous memory area starting at @addr, as
 397 *      obtained from vmalloc(), vmalloc_32() or __vmalloc().
 398 *
 399 *      May not be called in interrupt context.
 400 */
 401void vfree(void *addr)
 402{
 403        BUG_ON(in_interrupt());
 404        __vunmap(addr, 1);
 405}
 406
 407EXPORT_SYMBOL(vfree);
 408
 409/**
 410 *      vunmap  -  release virtual mapping obtained by vmap()
 411 *
 412 *      @addr:          memory base address
 413 *
 414 *      Free the virtually contiguous memory area starting at @addr,
 415 *      which was created from the page array passed to vmap().
 416 *
 417 *      May not be called in interrupt context.
 418 */
 419void vunmap(void *addr)
 420{
 421        BUG_ON(in_interrupt());
 422        __vunmap(addr, 0);
 423}
 424
 425EXPORT_SYMBOL(vunmap);
 426
 427/**
 428 *      vmap  -  map an array of pages into virtually contiguous space
 429 *
 430 *      @pages:         array of page pointers
 431 *      @count:         number of pages to map
 432 *      @flags:         vm_area->flags
 433 *      @prot:          page protection for the mapping
 434 *
 435 *      Maps @count pages from @pages into contiguous kernel virtual
 436 *      space.
 437 */
 438void *vmap(struct page **pages, unsigned int count,
 439                unsigned long flags, pgprot_t prot)
 440{
 441        struct vm_struct *area;
 442
 443        if (count > num_physpages)
 444                return NULL;
 445
 446        area = get_vm_area((count << PAGE_SHIFT), flags);
 447        if (!area)
 448                return NULL;
 449        if (map_vm_area(area, prot, &pages)) {
 450                vunmap(area->addr);
 451                return NULL;
 452        }
 453
 454        return area->addr;
 455}
 456
 457EXPORT_SYMBOL(vmap);
 458
 459/**
 460 *      __vmalloc  -  allocate virtually contiguous memory
 461 *
 462 *      @size:          allocation size
 463 *      @gfp_mask:      flags for the page level allocator
 464 *      @prot:          protection mask for the allocated pages
 465 *
 466 *      Allocate enough pages to cover @size from the page level
 467 *      allocator with @gfp_mask flags.  Map them into contiguous
 468 *      kernel virtual space, using a pagetable protection of @prot.
 469 */
 470void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
 471{
 472        struct vm_struct *area;
 473        struct page **pages;
 474        unsigned int nr_pages, array_size, i;
 475
 476        size = PAGE_ALIGN(size);
 477        if (!size || (size >> PAGE_SHIFT) > num_physpages)
 478                return NULL;
 479
 480        area = get_vm_area(size, VM_ALLOC);
 481        if (!area)
 482                return NULL;
 483
 484        nr_pages = size >> PAGE_SHIFT;
 485        array_size = (nr_pages * sizeof(struct page *));
 486
 487        area->nr_pages = nr_pages;
 488        /* Please note that the recursion is strictly bounded. */
 489        if (array_size > PAGE_SIZE)
 490                pages = __vmalloc(array_size, gfp_mask, PAGE_KERNEL);
 491        else
 492                pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
 493        area->pages = pages;
 494        if (!area->pages) {
 495                remove_vm_area(area->addr);
 496                kfree(area);
 497                return NULL;
 498        }
 499        memset(area->pages, 0, array_size);
 500
 501        for (i = 0; i < area->nr_pages; i++) {
 502                area->pages[i] = alloc_page(gfp_mask);
 503                if (unlikely(!area->pages[i])) {
 504                        /* Successfully allocated i pages, free them in __vunmap() */
 505                        area->nr_pages = i;
 506                        goto fail;
 507                }
 508        }
 509        
 510        if (map_vm_area(area, prot, &pages))
 511                goto fail;
 512        return area->addr;
 513
 514fail:
 515        vfree(area->addr);
 516        return NULL;
 517}
 518
 519EXPORT_SYMBOL(__vmalloc);
 520
 521/**
 522 *      vmalloc  -  allocate virtually contiguous memory
 523 *
 524 *      @size:          allocation size
 525 *
 526 *      Allocate enough pages to cover @size from the page level
 527 *      allocator and map them into contiguous kernel virtual space.
 528 *
 529 *      For tight cotrol over page level allocator and protection flags
 530 *      use __vmalloc() instead.
 531 */
 532void *vmalloc(unsigned long size)
 533{
 534       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 535}
 536
 537EXPORT_SYMBOL(vmalloc);
 538
 539/**
 540 *      vmalloc_exec  -  allocate virtually contiguous, executable memory
 541 *
 542 *      @size:          allocation size
 543 *
 544 *      Kernel-internal function to allocate enough pages to cover @size
 545 *      the page level allocator and map them into contiguous and
 546 *      executable kernel virtual space.
 547 *
 548 *      For tight cotrol over page level allocator and protection flags
 549 *      use __vmalloc() instead.
 550 */
 551
 552#ifndef PAGE_KERNEL_EXEC
 553# define PAGE_KERNEL_EXEC PAGE_KERNEL
 554#endif
 555
 556void *vmalloc_exec(unsigned long size)
 557{
 558        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
 559}
 560
 561/**
 562 *      vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
 563 *
 564 *      @size:          allocation size
 565 *
 566 *      Allocate enough 32bit PA addressable pages to cover @size from the
 567 *      page level allocator and map them into contiguous kernel virtual space.
 568 */
 569void *vmalloc_32(unsigned long size)
 570{
 571        return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
 572}
 573
 574EXPORT_SYMBOL(vmalloc_32);
 575
 576long vread(char *buf, char *addr, unsigned long count)
 577{
 578        struct vm_struct *tmp;
 579        char *vaddr, *buf_start = buf;
 580        unsigned long n;
 581
 582        /* Don't allow overflow */
 583        if ((unsigned long) addr + count < count)
 584                count = -(unsigned long) addr;
 585
 586        read_lock(&vmlist_lock);
 587        for (tmp = vmlist; tmp; tmp = tmp->next) {
 588                vaddr = (char *) tmp->addr;
 589                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 590                        continue;
 591                while (addr < vaddr) {
 592                        if (count == 0)
 593                                goto finished;
 594                        *buf = '\0';
 595                        buf++;
 596                        addr++;
 597                        count--;
 598                }
 599                n = vaddr + tmp->size - PAGE_SIZE - addr;
 600                do {
 601                        if (count == 0)
 602                                goto finished;
 603                        *buf = *addr;
 604                        buf++;
 605                        addr++;
 606                        count--;
 607                } while (--n > 0);
 608        }
 609finished:
 610        read_unlock(&vmlist_lock);
 611        return buf - buf_start;
 612}
 613
 614long vwrite(char *buf, char *addr, unsigned long count)
 615{
 616        struct vm_struct *tmp;
 617        char *vaddr, *buf_start = buf;
 618        unsigned long n;
 619
 620        /* Don't allow overflow */
 621        if ((unsigned long) addr + count < count)
 622                count = -(unsigned long) addr;
 623
 624        read_lock(&vmlist_lock);
 625        for (tmp = vmlist; tmp; tmp = tmp->next) {
 626                vaddr = (char *) tmp->addr;
 627                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 628                        continue;
 629                while (addr < vaddr) {
 630                        if (count == 0)
 631                                goto finished;
 632                        buf++;
 633                        addr++;
 634                        count--;
 635                }
 636                n = vaddr + tmp->size - PAGE_SIZE - addr;
 637                do {
 638                        if (count == 0)
 639                                goto finished;
 640                        *addr = *buf;
 641                        buf++;
 642                        addr++;
 643                        count--;
 644                } while (--n > 0);
 645        }
 646finished:
 647        read_unlock(&vmlist_lock);
 648        return buf - buf_start;
 649}
 650
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.