linux-bk/mm/vmalloc.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/vmalloc.c
   3 *
   4 *  Copyright (C) 1993  Linus Torvalds
   5 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   6 *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   7 *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
   8 */
   9
  10#include <linux/mm.h>
  11#include <linux/highmem.h>
  12#include <linux/slab.h>
  13#include <linux/spinlock.h>
  14#include <linux/interrupt.h>
  15
  16#include <linux/vmalloc.h>
  17
  18#include <asm/uaccess.h>
  19#include <asm/pgalloc.h>
  20#include <asm/tlbflush.h>
  21
  22
  23rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
  24struct vm_struct *vmlist;
  25
  26static inline void unmap_area_pte(pmd_t *pmd, unsigned long address,
  27                                  unsigned long size)
  28{
  29        unsigned long end;
  30        pte_t *pte;
  31
  32        if (pmd_none(*pmd))
  33                return;
  34        if (pmd_bad(*pmd)) {
  35                pmd_ERROR(*pmd);
  36                pmd_clear(pmd);
  37                return;
  38        }
  39
  40        pte = pte_offset_kernel(pmd, address);
  41        address &= ~PMD_MASK;
  42        end = address + size;
  43        if (end > PMD_SIZE)
  44                end = PMD_SIZE;
  45
  46        do {
  47                pte_t page;
  48                page = ptep_get_and_clear(pte);
  49                address += PAGE_SIZE;
  50                pte++;
  51                if (pte_none(page))
  52                        continue;
  53                if (pte_present(page))
  54                        continue;
  55                printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
  56        } while (address < end);
  57}
  58
  59static inline void unmap_area_pmd(pgd_t *dir, unsigned long address,
  60                                  unsigned long size)
  61{
  62        unsigned long end;
  63        pmd_t *pmd;
  64
  65        if (pgd_none(*dir))
  66                return;
  67        if (pgd_bad(*dir)) {
  68                pgd_ERROR(*dir);
  69                pgd_clear(dir);
  70                return;
  71        }
  72
  73        pmd = pmd_offset(dir, address);
  74        address &= ~PGDIR_MASK;
  75        end = address + size;
  76        if (end > PGDIR_SIZE)
  77                end = PGDIR_SIZE;
  78
  79        do {
  80                unmap_area_pte(pmd, address, end - address);
  81                address = (address + PMD_SIZE) & PMD_MASK;
  82                pmd++;
  83        } while (address < end);
  84}
  85
  86static inline int map_area_pte(pte_t *pte, unsigned long address,
  87                               unsigned long size, pgprot_t prot,
  88                               struct page ***pages)
  89{
  90        unsigned long end;
  91
  92        address &= ~PMD_MASK;
  93        end = address + size;
  94        if (end > PMD_SIZE)
  95                end = PMD_SIZE;
  96
  97        do {
  98                struct page *page = **pages;
  99
 100                if (!pte_none(*pte))
 101                        printk(KERN_ERR "alloc_area_pte: page already exists\n");
 102                if (!page)
 103                        return -ENOMEM;
 104
 105                set_pte(pte, mk_pte(page, prot));
 106                address += PAGE_SIZE;
 107                pte++;
 108                (*pages)++;
 109        } while (address < end);
 110        return 0;
 111}
 112
 113static inline int map_area_pmd(pmd_t *pmd, unsigned long address,
 114                               unsigned long size, pgprot_t prot,
 115                               struct page ***pages)
 116{
 117        unsigned long end;
 118
 119        address &= ~PGDIR_MASK;
 120        end = address + size;
 121        if (end > PGDIR_SIZE)
 122                end = PGDIR_SIZE;
 123
 124        do {
 125                pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
 126                if (!pte)
 127                        return -ENOMEM;
 128                if (map_area_pte(pte, address, end - address, prot, pages))
 129                        return -ENOMEM;
 130                address = (address + PMD_SIZE) & PMD_MASK;
 131                pmd++;
 132        } while (address < end);
 133
 134        return 0;
 135}
 136
 137void unmap_vm_area(struct vm_struct *area)
 138{
 139        unsigned long address = VMALLOC_VMADDR(area->addr);
 140        unsigned long end = (address + area->size);
 141        pgd_t *dir;
 142
 143        dir = pgd_offset_k(address);
 144        flush_cache_all();
 145        do {
 146                unmap_area_pmd(dir, address, end - address);
 147                address = (address + PGDIR_SIZE) & PGDIR_MASK;
 148                dir++;
 149        } while (address && (address < end));
 150        flush_tlb_kernel_range(VMALLOC_VMADDR(area->addr), end);
 151}
 152
 153int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 154{
 155        unsigned long address = VMALLOC_VMADDR(area->addr);
 156        unsigned long end = address + (area->size-PAGE_SIZE);
 157        pgd_t *dir;
 158        int err = 0;
 159
 160        dir = pgd_offset_k(address);
 161        spin_lock(&init_mm.page_table_lock);
 162        do {
 163                pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 164                if (!pmd) {
 165                        err = -ENOMEM;
 166                        break;
 167                }
 168                if (map_area_pmd(pmd, address, end - address, prot, pages)) {
 169                        err = -ENOMEM;
 170                        break;
 171                }
 172
 173                address = (address + PGDIR_SIZE) & PGDIR_MASK;
 174                dir++;
 175        } while (address && (address < end));
 176
 177        spin_unlock(&init_mm.page_table_lock);
 178        flush_cache_all();
 179        return err;
 180}
 181
 182
 183/**
 184 *      get_vm_area  -  reserve a contingous kernel virtual area
 185 *
 186 *      @size:          size of the area
 187 *      @flags:         %VM_IOREMAP for I/O mappings or VM_ALLOC
 188 *
 189 *      Search an area of @size in the kernel virtual mapping area,
 190 *      and reserved it for out purposes.  Returns the area descriptor
 191 *      on success or %NULL on failure.
 192 */
 193struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 194{
 195        struct vm_struct **p, *tmp, *area;
 196        unsigned long addr = VMALLOC_START;
 197
 198        area = kmalloc(sizeof(*area), GFP_KERNEL);
 199        if (unlikely(!area))
 200                return NULL;
 201
 202        /*
 203         * We always allocate a guard page.
 204         */
 205        size += PAGE_SIZE;
 206
 207        write_lock(&vmlist_lock);
 208        for (p = &vmlist; (tmp = *p) ;p = &tmp->next) {
 209                if ((size + addr) < addr)
 210                        goto out;
 211                if (size + addr <= (unsigned long)tmp->addr)
 212                        goto found;
 213                addr = tmp->size + (unsigned long)tmp->addr;
 214                if (addr > VMALLOC_END-size)
 215                        goto out;
 216        }
 217
 218found:
 219        area->next = *p;
 220        *p = area;
 221
 222        area->flags = flags;
 223        area->addr = (void *)addr;
 224        area->size = size;
 225        area->pages = NULL;
 226        area->nr_pages = 0;
 227        area->phys_addr = 0;
 228        write_unlock(&vmlist_lock);
 229
 230        return area;
 231
 232out:
 233        write_unlock(&vmlist_lock);
 234        kfree(area);
 235        return NULL;
 236}
 237
 238/**
 239 *      remove_vm_area  -  find and remove a contingous kernel virtual area
 240 *
 241 *      @addr:          base address
 242 *
 243 *      Search for the kernel VM area starting at @addr, and remove it.
 244 *      This function returns the found VM area, but using it is NOT safe
 245 *      on SMP machines.
 246 */
 247struct vm_struct *remove_vm_area(void *addr)
 248{
 249        struct vm_struct **p, *tmp;
 250
 251        write_lock(&vmlist_lock);
 252        for (p = &vmlist ; (tmp = *p) ;p = &tmp->next) {
 253                 if (tmp->addr == addr)
 254                         goto found;
 255        }
 256        write_unlock(&vmlist_lock);
 257        return NULL;
 258
 259found:
 260        *p = tmp->next;
 261        write_unlock(&vmlist_lock);
 262        return tmp;
 263}
 264
 265void __vunmap(void *addr, int deallocate_pages)
 266{
 267        struct vm_struct *area;
 268
 269        if (!addr)
 270                return;
 271
 272        if ((PAGE_SIZE-1) & (unsigned long)addr) {
 273                printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
 274                return;
 275        }
 276
 277        area = remove_vm_area(addr);
 278        if (unlikely(!area)) {
 279                printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 280                                addr);
 281                return;
 282        }
 283
 284        unmap_vm_area(area);
 285        
 286        if (deallocate_pages) {
 287                int i;
 288
 289                for (i = 0; i < area->nr_pages; i++) {
 290                        if (unlikely(!area->pages[i]))
 291                                BUG();
 292                        __free_page(area->pages[i]);
 293                }
 294
 295                kfree(area->pages);
 296        }
 297
 298        kfree(area);
 299        return;
 300}
 301
 302/**
 303 *      vfree  -  release memory allocated by vmalloc()
 304 *
 305 *      @addr:          memory base address
 306 *
 307 *      Free the virtually continguos memory area starting at @addr, as
 308 *      obtained from vmalloc(), vmalloc_32() or __vmalloc().
 309 *
 310 *      May not be called in interrupt context.
 311 */
 312void vfree(void *addr)
 313{
 314        BUG_ON(in_interrupt());
 315        __vunmap(addr, 1);
 316}
 317
 318/**
 319 *      vunmap  -  release virtual mapping obtained by vmap()
 320 *
 321 *      @addr:          memory base address
 322 *
 323 *      Free the virtually continguos memory area starting at @addr,
 324 *      which was created from the page array passed to vmap().
 325 *
 326 *      May not be called in interrupt context.
 327 */
 328void vunmap(void *addr)
 329{
 330        BUG_ON(in_interrupt());
 331        __vunmap(addr, 0);
 332}
 333
 334/**
 335 *      vmap  -  map an array of pages into virtually continguos space
 336 *
 337 *      @pages:         array of page pointers
 338 *      @count:         number of pages to map
 339 *
 340 *      Maps @count pages from @pages into continguos kernel virtual
 341 *      space.
 342 */
 343void *vmap(struct page **pages, unsigned int count)
 344{
 345        struct vm_struct *area;
 346
 347        if (count > num_physpages)
 348                return NULL;
 349
 350        area = get_vm_area((count << PAGE_SHIFT), VM_MAP);
 351        if (!area)
 352                return NULL;
 353        if (map_vm_area(area, PAGE_KERNEL, &pages)) {
 354                vunmap(area->addr);
 355                return NULL;
 356        }
 357
 358        return area->addr;
 359}
 360
 361/**
 362 *      __vmalloc  -  allocate virtually continguos memory
 363 *
 364 *      @size:          allocation size
 365 *      @gfp_mask:      flags for the page level allocator
 366 *      @prot:          protection mask for the allocated pages
 367 *
 368 *      Allocate enough pages to cover @size from the page level
 369 *      allocator with @gfp_mask flags.  Map them into continguos
 370 *      kernel virtual space, using a pagetable protection of @prot.
 371 */
 372void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
 373{
 374        struct vm_struct *area;
 375        struct page **pages;
 376        unsigned int nr_pages, array_size, i;
 377
 378        size = PAGE_ALIGN(size);
 379        if (!size || (size >> PAGE_SHIFT) > num_physpages)
 380                return NULL;
 381
 382        area = get_vm_area(size, VM_ALLOC);
 383        if (!area)
 384                return NULL;
 385
 386        nr_pages = (size+PAGE_SIZE) >> PAGE_SHIFT;
 387        array_size = (nr_pages * sizeof(struct page *));
 388
 389        area->nr_pages = nr_pages;
 390        area->pages = pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
 391        if (!area->pages) {
 392                remove_vm_area(area->addr);
 393                kfree(area);
 394                return NULL;
 395        }
 396        memset(area->pages, 0, array_size);
 397
 398        for (i = 0; i < area->nr_pages; i++) {
 399                area->pages[i] = alloc_page(gfp_mask);
 400                if (unlikely(!area->pages[i])) {
 401                        /* Successfully allocated i pages, free them in __vunmap() */
 402                        area->nr_pages = i;
 403                        goto fail;
 404                }
 405        }
 406        
 407        if (map_vm_area(area, prot, &pages))
 408                goto fail;
 409        return area->addr;
 410
 411fail:
 412        vfree(area->addr);
 413        return NULL;
 414}
 415
 416/**
 417 *      vmalloc  -  allocate virtually continguos memory
 418 *
 419 *      @size:          allocation size
 420 *
 421 *      Allocate enough pages to cover @size from the page level
 422 *      allocator and map them into continguos kernel virtual space.
 423 *
 424 *      For tight cotrol over page level allocator and protection flags
 425 *      use __vmalloc() instead.
 426 */
 427void *vmalloc(unsigned long size)
 428{
 429       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 430}
 431
 432/**
 433 *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
 434 *
 435 *      @size:          allocation size
 436 *
 437 *      Allocate enough 32bit PA addressable pages to cover @size from the
 438 *      page level allocator and map them into continguos kernel virtual space.
 439 */
 440void *vmalloc_32(unsigned long size)
 441{
 442        return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
 443}
 444
 445long vread(char *buf, char *addr, unsigned long count)
 446{
 447        struct vm_struct *tmp;
 448        char *vaddr, *buf_start = buf;
 449        unsigned long n;
 450
 451        /* Don't allow overflow */
 452        if ((unsigned long) addr + count < count)
 453                count = -(unsigned long) addr;
 454
 455        read_lock(&vmlist_lock);
 456        for (tmp = vmlist; tmp; tmp = tmp->next) {
 457                vaddr = (char *) tmp->addr;
 458                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 459                        continue;
 460                while (addr < vaddr) {
 461                        if (count == 0)
 462                                goto finished;
 463                        *buf = '\0';
 464                        buf++;
 465                        addr++;
 466                        count--;
 467                }
 468                n = vaddr + tmp->size - PAGE_SIZE - addr;
 469                do {
 470                        if (count == 0)
 471                                goto finished;
 472                        *buf = *addr;
 473                        buf++;
 474                        addr++;
 475                        count--;
 476                } while (--n > 0);
 477        }
 478finished:
 479        read_unlock(&vmlist_lock);
 480        return buf - buf_start;
 481}
 482
 483long vwrite(char *buf, char *addr, unsigned long count)
 484{
 485        struct vm_struct *tmp;
 486        char *vaddr, *buf_start = buf;
 487        unsigned long n;
 488
 489        /* Don't allow overflow */
 490        if ((unsigned long) addr + count < count)
 491                count = -(unsigned long) addr;
 492
 493        read_lock(&vmlist_lock);
 494        for (tmp = vmlist; tmp; tmp = tmp->next) {
 495                vaddr = (char *) tmp->addr;
 496                if (addr >= vaddr + tmp->size - PAGE_SIZE)
 497                        continue;
 498                while (addr < vaddr) {
 499                        if (count == 0)
 500                                goto finished;
 501                        buf++;
 502                        addr++;
 503                        count--;
 504                }
 505                n = vaddr + tmp->size - PAGE_SIZE - addr;
 506                do {
 507                        if (count == 0)
 508                                goto finished;
 509                        *addr = *buf;
 510                        buf++;
 511                        addr++;
 512                        count--;
 513                } while (--n > 0);
 514        }
 515finished:
 516        read_unlock(&vmlist_lock);
 517        return buf - buf_start;
 518}
 519
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.