linux/mm/nommu.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/nommu.c
   3 *
   4 *  Replacement code for mm functions to support CPU's that don't
   5 *  have any form of memory management unit (thus no virtual memory).
   6 *
   7 *  See Documentation/nommu-mmap.txt
   8 *
   9 *  Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
  10 *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
  11 *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
  12 *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
  13 */
  14
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/swap.h>
  18#include <linux/file.h>
  19#include <linux/highmem.h>
  20#include <linux/pagemap.h>
  21#include <linux/slab.h>
  22#include <linux/vmalloc.h>
  23#include <linux/ptrace.h>
  24#include <linux/blkdev.h>
  25#include <linux/backing-dev.h>
  26#include <linux/mount.h>
  27#include <linux/personality.h>
  28#include <linux/security.h>
  29#include <linux/syscalls.h>
  30
  31#include <asm/uaccess.h>
  32#include <asm/tlb.h>
  33#include <asm/tlbflush.h>
  34
  35void *high_memory;
  36struct page *mem_map;
  37unsigned long max_mapnr;
  38unsigned long num_physpages;
  39unsigned long askedalloc, realalloc;
  40atomic_t vm_committed_space = ATOMIC_INIT(0);
  41int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
  42int sysctl_overcommit_ratio = 50; /* default is 50% */
  43int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
  44int heap_stack_gap = 0;
  45
  46EXPORT_SYMBOL(mem_map);
  47EXPORT_SYMBOL(__vm_enough_memory);
  48EXPORT_SYMBOL(num_physpages);
  49
  50/* list of shareable VMAs */
  51struct rb_root nommu_vma_tree = RB_ROOT;
  52DECLARE_RWSEM(nommu_vma_sem);
  53
  54struct vm_operations_struct generic_file_vm_ops = {
  55};
  56
  57EXPORT_SYMBOL(vfree);
  58EXPORT_SYMBOL(vmalloc_to_page);
  59EXPORT_SYMBOL(vmalloc_32);
  60EXPORT_SYMBOL(vmap);
  61EXPORT_SYMBOL(vunmap);
  62
  63/*
  64 * Handle all mappings that got truncated by a "truncate()"
  65 * system call.
  66 *
  67 * NOTE! We have to be ready to update the memory sharing
  68 * between the file and the memory map for a potential last
  69 * incomplete page.  Ugly, but necessary.
  70 */
  71int vmtruncate(struct inode *inode, loff_t offset)
  72{
  73        struct address_space *mapping = inode->i_mapping;
  74        unsigned long limit;
  75
  76        if (inode->i_size < offset)
  77                goto do_expand;
  78        i_size_write(inode, offset);
  79
  80        truncate_inode_pages(mapping, offset);
  81        goto out_truncate;
  82
  83do_expand:
  84        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
  85        if (limit != RLIM_INFINITY && offset > limit)
  86                goto out_sig;
  87        if (offset > inode->i_sb->s_maxbytes)
  88                goto out;
  89        i_size_write(inode, offset);
  90
  91out_truncate:
  92        if (inode->i_op && inode->i_op->truncate)
  93                inode->i_op->truncate(inode);
  94        return 0;
  95out_sig:
  96        send_sig(SIGXFSZ, current, 0);
  97out:
  98        return -EFBIG;
  99}
 100
 101EXPORT_SYMBOL(vmtruncate);
 102
 103/*
 104 * Return the total memory allocated for this pointer, not
 105 * just what the caller asked for.
 106 *
 107 * Doesn't have to be accurate, i.e. may have races.
 108 */
 109unsigned int kobjsize(const void *objp)
 110{
 111        struct page *page;
 112
 113        if (!objp || !((page = virt_to_page(objp))))
 114                return 0;
 115
 116        if (PageSlab(page))
 117                return ksize(objp);
 118
 119        BUG_ON(page->index < 0);
 120        BUG_ON(page->index >= MAX_ORDER);
 121
 122        return (PAGE_SIZE << page->index);
 123}
 124
 125/*
 126 * get a list of pages in an address range belonging to the specified process
 127 * and indicate the VMA that covers each page
 128 * - this is potentially dodgy as we may end incrementing the page count of a
 129 *   slab page or a secondary page from a compound page
 130 * - don't permit access to VMAs that don't support it, such as I/O mappings
 131 */
 132int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 133        unsigned long start, int len, int write, int force,
 134        struct page **pages, struct vm_area_struct **vmas)
 135{
 136        struct vm_area_struct *vma;
 137        unsigned long vm_flags;
 138        int i;
 139
 140        /* calculate required read or write permissions.
 141         * - if 'force' is set, we only require the "MAY" flags.
 142         */
 143        vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 144        vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 145
 146        for (i = 0; i < len; i++) {
 147                vma = find_vma(mm, start);
 148                if (!vma)
 149                        goto finish_or_fault;
 150
 151                /* protect what we can, including chardevs */
 152                if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
 153                    !(vm_flags & vma->vm_flags))
 154                        goto finish_or_fault;
 155
 156                if (pages) {
 157                        pages[i] = virt_to_page(start);
 158                        if (pages[i])
 159                                page_cache_get(pages[i]);
 160                }
 161                if (vmas)
 162                        vmas[i] = vma;
 163                start += PAGE_SIZE;
 164        }
 165
 166        return i;
 167
 168finish_or_fault:
 169        return i ? : -EFAULT;
 170}
 171
 172EXPORT_SYMBOL(get_user_pages);
 173
 174DEFINE_RWLOCK(vmlist_lock);
 175struct vm_struct *vmlist;
 176
 177void vfree(void *addr)
 178{
 179        kfree(addr);
 180}
 181
 182void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 183{
 184        /*
 185         * kmalloc doesn't like __GFP_HIGHMEM for some reason
 186         */
 187        return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
 188}
 189
 190struct page * vmalloc_to_page(void *addr)
 191{
 192        return virt_to_page(addr);
 193}
 194
 195unsigned long vmalloc_to_pfn(void *addr)
 196{
 197        return page_to_pfn(virt_to_page(addr));
 198}
 199
 200
 201long vread(char *buf, char *addr, unsigned long count)
 202{
 203        memcpy(buf, addr, count);
 204        return count;
 205}
 206
 207long vwrite(char *buf, char *addr, unsigned long count)
 208{
 209        /* Don't allow overflow */
 210        if ((unsigned long) addr + count < count)
 211                count = -(unsigned long) addr;
 212
 213        memcpy(addr, buf, count);
 214        return(count);
 215}
 216
 217/*
 218 *      vmalloc  -  allocate virtually continguos memory
 219 *
 220 *      @size:          allocation size
 221 *
 222 *      Allocate enough pages to cover @size from the page level
 223 *      allocator and map them into continguos kernel virtual space.
 224 *
 225 *      For tight control over page level allocator and protection flags
 226 *      use __vmalloc() instead.
 227 */
 228void *vmalloc(unsigned long size)
 229{
 230       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 231}
 232EXPORT_SYMBOL(vmalloc);
 233
 234void *vmalloc_node(unsigned long size, int node)
 235{
 236        return vmalloc(size);
 237}
 238EXPORT_SYMBOL(vmalloc_node);
 239
 240/*
 241 *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
 242 *
 243 *      @size:          allocation size
 244 *
 245 *      Allocate enough 32bit PA addressable pages to cover @size from the
 246 *      page level allocator and map them into continguos kernel virtual space.
 247 */
 248void *vmalloc_32(unsigned long size)
 249{
 250        return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
 251}
 252
 253void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
 254{
 255        BUG();
 256        return NULL;
 257}
 258
 259void vunmap(void *addr)
 260{
 261        BUG();
 262}
 263
 264/*
 265 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
 266 * have one.
 267 */
 268void  __attribute__((weak)) vmalloc_sync_all(void)
 269{
 270}
 271
 272/*
 273 *  sys_brk() for the most part doesn't need the global kernel
 274 *  lock, except when an application is doing something nasty
 275 *  like trying to un-brk an area that has already been mapped
 276 *  to a regular file.  in this case, the unmapping will need
 277 *  to invoke file system routines that need the global lock.
 278 */
 279asmlinkage unsigned long sys_brk(unsigned long brk)
 280{
 281        struct mm_struct *mm = current->mm;
 282
 283        if (brk < mm->start_brk || brk > mm->context.end_brk)
 284                return mm->brk;
 285
 286        if (mm->brk == brk)
 287                return mm->brk;
 288
 289        /*
 290         * Always allow shrinking brk
 291         */
 292        if (brk <= mm->brk) {
 293                mm->brk = brk;
 294                return brk;
 295        }
 296
 297        /*
 298         * Ok, looks good - let it rip.
 299         */
 300        return mm->brk = brk;
 301}
 302
 303#ifdef DEBUG
 304static void show_process_blocks(void)
 305{
 306        struct vm_list_struct *vml;
 307
 308        printk("Process blocks %d:", current->pid);
 309
 310        for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
 311                printk(" %p: %p", vml, vml->vma);
 312                if (vml->vma)
 313                        printk(" (%d @%lx #%d)",
 314                               kobjsize((void *) vml->vma->vm_start),
 315                               vml->vma->vm_start,
 316                               atomic_read(&vml->vma->vm_usage));
 317                printk(vml->next ? " ->" : ".\n");
 318        }
 319}
 320#endif /* DEBUG */
 321
 322/*
 323 * add a VMA into a process's mm_struct in the appropriate place in the list
 324 * - should be called with mm->mmap_sem held writelocked
 325 */
 326static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml)
 327{
 328        struct vm_list_struct **ppv;
 329
 330        for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next)
 331                if ((*ppv)->vma->vm_start > vml->vma->vm_start)
 332                        break;
 333
 334        vml->next = *ppv;
 335        *ppv = vml;
 336}
 337
 338/*
 339 * look up the first VMA in which addr resides, NULL if none
 340 * - should be called with mm->mmap_sem at least held readlocked
 341 */
 342struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 343{
 344        struct vm_list_struct *loop, *vml;
 345
 346        /* search the vm_start ordered list */
 347        vml = NULL;
 348        for (loop = mm->context.vmlist; loop; loop = loop->next) {
 349                if (loop->vma->vm_start > addr)
 350                        break;
 351                vml = loop;
 352        }
 353
 354        if (vml && vml->vma->vm_end > addr)
 355                return vml->vma;
 356
 357        return NULL;
 358}
 359EXPORT_SYMBOL(find_vma);
 360
 361/*
 362 * find a VMA
 363 * - we don't extend stack VMAs under NOMMU conditions
 364 */
 365struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
 366{
 367        return find_vma(mm, addr);
 368}
 369
 370/*
 371 * look up the first VMA exactly that exactly matches addr
 372 * - should be called with mm->mmap_sem at least held readlocked
 373 */
 374static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
 375                                                    unsigned long addr)
 376{
 377        struct vm_list_struct *vml;
 378
 379        /* search the vm_start ordered list */
 380        for (vml = mm->context.vmlist; vml; vml = vml->next) {
 381                if (vml->vma->vm_start == addr)
 382                        return vml->vma;
 383                if (vml->vma->vm_start > addr)
 384                        break;
 385        }
 386
 387        return NULL;
 388}
 389
 390/*
 391 * find a VMA in the global tree
 392 */
 393static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
 394{
 395        struct vm_area_struct *vma;
 396        struct rb_node *n = nommu_vma_tree.rb_node;
 397
 398        while (n) {
 399                vma = rb_entry(n, struct vm_area_struct, vm_rb);
 400
 401                if (start < vma->vm_start)
 402                        n = n->rb_left;
 403                else if (start > vma->vm_start)
 404                        n = n->rb_right;
 405                else
 406                        return vma;
 407        }
 408
 409        return NULL;
 410}
 411
 412/*
 413 * add a VMA in the global tree
 414 */
 415static void add_nommu_vma(struct vm_area_struct *vma)
 416{
 417        struct vm_area_struct *pvma;
 418        struct address_space *mapping;
 419        struct rb_node **p = &nommu_vma_tree.rb_node;
 420        struct rb_node *parent = NULL;
 421
 422        /* add the VMA to the mapping */
 423        if (vma->vm_file) {
 424                mapping = vma->vm_file->f_mapping;
 425
 426                flush_dcache_mmap_lock(mapping);
 427                vma_prio_tree_insert(vma, &mapping->i_mmap);
 428                flush_dcache_mmap_unlock(mapping);
 429        }
 430
 431        /* add the VMA to the master list */
 432        while (*p) {
 433                parent = *p;
 434                pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
 435
 436                if (vma->vm_start < pvma->vm_start) {
 437                        p = &(*p)->rb_left;
 438                }
 439                else if (vma->vm_start > pvma->vm_start) {
 440                        p = &(*p)->rb_right;
 441                }
 442                else {
 443                        /* mappings are at the same address - this can only
 444                         * happen for shared-mem chardevs and shared file
 445                         * mappings backed by ramfs/tmpfs */
 446                        BUG_ON(!(pvma->vm_flags & VM_SHARED));
 447
 448                        if (vma < pvma)
 449                                p = &(*p)->rb_left;
 450                        else if (vma > pvma)
 451                                p = &(*p)->rb_right;
 452                        else
 453                                BUG();
 454                }
 455        }
 456
 457        rb_link_node(&vma->vm_rb, parent, p);
 458        rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
 459}
 460
 461/*
 462 * delete a VMA from the global list
 463 */
 464static void delete_nommu_vma(struct vm_area_struct *vma)
 465{
 466        struct address_space *mapping;
 467
 468        /* remove the VMA from the mapping */
 469        if (vma->vm_file) {
 470                mapping = vma->vm_file->f_mapping;
 471
 472                flush_dcache_mmap_lock(mapping);
 473                vma_prio_tree_remove(vma, &mapping->i_mmap);
 474                flush_dcache_mmap_unlock(mapping);
 475        }
 476
 477        /* remove from the master list */
 478        rb_erase(&vma->vm_rb, &nommu_vma_tree);
 479}
 480
 481/*
 482 * determine whether a mapping should be permitted and, if so, what sort of
 483 * mapping we're capable of supporting
 484 */
 485static int validate_mmap_request(struct file *file,
 486                                 unsigned long addr,
 487                                 unsigned long len,
 488                                 unsigned long prot,
 489                                 unsigned long flags,
 490                                 unsigned long pgoff,
 491                                 unsigned long *_capabilities)
 492{
 493        unsigned long capabilities;
 494        unsigned long reqprot = prot;
 495        int ret;
 496
 497        /* do the simple checks first */
 498        if (flags & MAP_FIXED || addr) {
 499                printk(KERN_DEBUG
 500                       "%d: Can't do fixed-address/overlay mmap of RAM\n",
 501                       current->pid);
 502                return -EINVAL;
 503        }
 504
 505        if ((flags & MAP_TYPE) != MAP_PRIVATE &&
 506            (flags & MAP_TYPE) != MAP_SHARED)
 507                return -EINVAL;
 508
 509        if (!len)
 510                return -EINVAL;
 511
 512        /* Careful about overflows.. */
 513        len = PAGE_ALIGN(len);
 514        if (!len || len > TASK_SIZE)
 515                return -ENOMEM;
 516
 517        /* offset overflow? */
 518        if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
 519                return -EOVERFLOW;
 520
 521        if (file) {
 522                /* validate file mapping requests */
 523                struct address_space *mapping;
 524
 525                /* files must support mmap */
 526                if (!file->f_op || !file->f_op->mmap)
 527                        return -ENODEV;
 528
 529                /* work out if what we've got could possibly be shared
 530                 * - we support chardevs that provide their own "memory"
 531                 * - we support files/blockdevs that are memory backed
 532                 */
 533                mapping = file->f_mapping;
 534                if (!mapping)
 535                        mapping = file->f_path.dentry->d_inode->i_mapping;
 536
 537                capabilities = 0;
 538                if (mapping && mapping->backing_dev_info)
 539                        capabilities = mapping->backing_dev_info->capabilities;
 540
 541                if (!capabilities) {
 542                        /* no explicit capabilities set, so assume some
 543                         * defaults */
 544                        switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) {
 545                        case S_IFREG:
 546                        case S_IFBLK:
 547                                capabilities = BDI_CAP_MAP_COPY;
 548                                break;
 549
 550                        case S_IFCHR:
 551                                capabilities =
 552                                        BDI_CAP_MAP_DIRECT |
 553                                        BDI_CAP_READ_MAP |
 554                                        BDI_CAP_WRITE_MAP;
 555                                break;
 556
 557                        default:
 558                                return -EINVAL;
 559                        }
 560                }
 561
 562                /* eliminate any capabilities that we can't support on this
 563                 * device */
 564                if (!file->f_op->get_unmapped_area)
 565                        capabilities &= ~BDI_CAP_MAP_DIRECT;
 566                if (!file->f_op->read)
 567                        capabilities &= ~BDI_CAP_MAP_COPY;
 568
 569                if (flags & MAP_SHARED) {
 570                        /* do checks for writing, appending and locking */
 571                        if ((prot & PROT_WRITE) &&
 572                            !(file->f_mode & FMODE_WRITE))
 573                                return -EACCES;
 574
 575                        if (IS_APPEND(file->f_path.dentry->d_inode) &&
 576                            (file->f_mode & FMODE_WRITE))
 577                                return -EACCES;
 578
 579                        if (locks_verify_locked(file->f_path.dentry->d_inode))
 580                                return -EAGAIN;
 581
 582                        if (!(capabilities & BDI_CAP_MAP_DIRECT))
 583                                return -ENODEV;
 584
 585                        if (((prot & PROT_READ)  && !(capabilities & BDI_CAP_READ_MAP))  ||
 586                            ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) ||
 587                            ((prot & PROT_EXEC)  && !(capabilities & BDI_CAP_EXEC_MAP))
 588                            ) {
 589                                printk("MAP_SHARED not completely supported on !MMU\n");
 590                                return -EINVAL;
 591                        }
 592
 593                        /* we mustn't privatise shared mappings */
 594                        capabilities &= ~BDI_CAP_MAP_COPY;
 595                }
 596                else {
 597                        /* we're going to read the file into private memory we
 598                         * allocate */
 599                        if (!(capabilities & BDI_CAP_MAP_COPY))
 600                                return -ENODEV;
 601
 602                        /* we don't permit a private writable mapping to be
 603                         * shared with the backing device */
 604                        if (prot & PROT_WRITE)
 605                                capabilities &= ~BDI_CAP_MAP_DIRECT;
 606                }
 607
 608                /* handle executable mappings and implied executable
 609                 * mappings */
 610                if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
 611                        if (prot & PROT_EXEC)
 612                                return -EPERM;
 613                }
 614                else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
 615                        /* handle implication of PROT_EXEC by PROT_READ */
 616                        if (current->personality & READ_IMPLIES_EXEC) {
 617                                if (capabilities & BDI_CAP_EXEC_MAP)
 618                                        prot |= PROT_EXEC;
 619                        }
 620                }
 621                else if ((prot & PROT_READ) &&
 622                         (prot & PROT_EXEC) &&
 623                         !(capabilities & BDI_CAP_EXEC_MAP)
 624                         ) {
 625                        /* backing file is not executable, try to copy */
 626                        capabilities &= ~BDI_CAP_MAP_DIRECT;
 627                }
 628        }
 629        else {
 630                /* anonymous mappings are always memory backed and can be
 631                 * privately mapped
 632                 */
 633                capabilities = BDI_CAP_MAP_COPY;
 634
 635                /* handle PROT_EXEC implication by PROT_READ */
 636                if ((prot & PROT_READ) &&
 637                    (current->personality & READ_IMPLIES_EXEC))
 638                        prot |= PROT_EXEC;
 639        }
 640
 641        /* allow the security API to have its say */
 642        ret = security_file_mmap(file, reqprot, prot, flags);
 643        if (ret < 0)
 644                return ret;
 645
 646        /* looks okay */
 647        *_capabilities = capabilities;
 648        return 0;
 649}
 650
 651/*
 652 * we've determined that we can make the mapping, now translate what we
 653 * now know into VMA flags
 654 */
 655static unsigned long determine_vm_flags(struct file *file,
 656                                        unsigned long prot,
 657                                        unsigned long flags,
 658                                        unsigned long capabilities)
 659{
 660        unsigned long vm_flags;
 661
 662        vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
 663        vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 664        /* vm_flags |= mm->def_flags; */
 665
 666        if (!(capabilities & BDI_CAP_MAP_DIRECT)) {
 667                /* attempt to share read-only copies of mapped file chunks */
 668                if (file && !(prot & PROT_WRITE))
 669                        vm_flags |= VM_MAYSHARE;
 670        }
 671        else {
 672                /* overlay a shareable mapping on the backing device or inode
 673                 * if possible - used for chardevs, ramfs/tmpfs/shmfs and
 674                 * romfs/cramfs */
 675                if (flags & MAP_SHARED)
 676                        vm_flags |= VM_MAYSHARE | VM_SHARED;
 677                else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0)
 678                        vm_flags |= VM_MAYSHARE;
 679        }
 680
 681        /* refuse to let anyone share private mappings with this process if
 682         * it's being traced - otherwise breakpoints set in it may interfere
 683         * with another untraced process
 684         */
 685        if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
 686                vm_flags &= ~VM_MAYSHARE;
 687
 688        return vm_flags;
 689}
 690
 691/*
 692 * set up a shared mapping on a file
 693 */
 694static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
 695{
 696        int ret;
 697
 698        ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
 699        if (ret != -ENOSYS)
 700                return ret;
 701
 702        /* getting an ENOSYS error indicates that direct mmap isn't
 703         * possible (as opposed to tried but failed) so we'll fall
 704         * through to making a private copy of the data and mapping
 705         * that if we can */
 706        return -ENODEV;
 707}
 708
 709/*
 710 * set up a private mapping or an anonymous shared mapping
 711 */
 712static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
 713{
 714        void *base;
 715        int ret;
 716
 717        /* invoke the file's mapping function so that it can keep track of
 718         * shared mappings on devices or memory
 719         * - VM_MAYSHARE will be set if it may attempt to share
 720         */
 721        if (vma->vm_file) {
 722                ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
 723                if (ret != -ENOSYS) {
 724                        /* shouldn't return success if we're not sharing */
 725                        BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
 726                        return ret; /* success or a real error */
 727                }
 728
 729                /* getting an ENOSYS error indicates that direct mmap isn't
 730                 * possible (as opposed to tried but failed) so we'll try to
 731                 * make a private copy of the data and map that instead */
 732        }
 733
 734        /* allocate some memory to hold the mapping
 735         * - note that this may not return a page-aligned address if the object
 736         *   we're allocating is smaller than a page
 737         */
 738        base = kmalloc(len, GFP_KERNEL|__GFP_COMP);
 739        if (!base)
 740                goto enomem;
 741
 742        vma->vm_start = (unsigned long) base;
 743        vma->vm_end = vma->vm_start + len;
 744        vma->vm_flags |= VM_MAPPED_COPY;
 745
 746#ifdef WARN_ON_SLACK
 747        if (len + WARN_ON_SLACK <= kobjsize(result))
 748                printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
 749                       len, current->pid, kobjsize(result) - len);
 750#endif
 751
 752        if (vma->vm_file) {
 753                /* read the contents of a file into the copy */
 754                mm_segment_t old_fs;
 755                loff_t fpos;
 756
 757                fpos = vma->vm_pgoff;
 758                fpos <<= PAGE_SHIFT;
 759
 760                old_fs = get_fs();
 761                set_fs(KERNEL_DS);
 762                ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
 763                set_fs(old_fs);
 764
 765                if (ret < 0)
 766                        goto error_free;
 767
 768                /* clear the last little bit */
 769                if (ret < len)
 770                        memset(base + ret, 0, len - ret);
 771
 772        } else {
 773                /* if it's an anonymous mapping, then just clear it */
 774                memset(base, 0, len);
 775        }
 776
 777        return 0;
 778
 779error_free:
 780        kfree(base);
 781        vma->vm_start = 0;
 782        return ret;
 783
 784enomem:
 785        printk("Allocation of length %lu from process %d failed\n",
 786               len, current->pid);
 787        show_free_areas();
 788        return -ENOMEM;
 789}
 790
 791/*
 792 * handle mapping creation for uClinux
 793 */
 794unsigned long do_mmap_pgoff(struct file *file,
 795                            unsigned long addr,
 796                            unsigned long len,
 797                            unsigned long prot,
 798                            unsigned long flags,
 799                            unsigned long pgoff)
 800{
 801        struct vm_list_struct *vml = NULL;
 802        struct vm_area_struct *vma = NULL;
 803        struct rb_node *rb;
 804        unsigned long capabilities, vm_flags;
 805        void *result;
 806        int ret;
 807
 808        /* decide whether we should attempt the mapping, and if so what sort of
 809         * mapping */
 810        ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
 811                                    &capabilities);
 812        if (ret < 0)
 813                return ret;
 814
 815        /* we've determined that we can make the mapping, now translate what we
 816         * now know into VMA flags */
 817        vm_flags = determine_vm_flags(file, prot, flags, capabilities);
 818
 819        /* we're going to need to record the mapping if it works */
 820        vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
 821        if (!vml)
 822                goto error_getting_vml;
 823
 824        down_write(&nommu_vma_sem);
 825
 826        /* if we want to share, we need to check for VMAs created by other
 827         * mmap() calls that overlap with our proposed mapping
 828         * - we can only share with an exact match on most regular files
 829         * - shared mappings on character devices and memory backed files are
 830         *   permitted to overlap inexactly as far as we are concerned for in
 831         *   these cases, sharing is handled in the driver or filesystem rather
 832         *   than here
 833         */
 834        if (vm_flags & VM_MAYSHARE) {
 835                unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 836                unsigned long vmpglen;
 837
 838                /* suppress VMA sharing for shared regions */
 839                if (vm_flags & VM_SHARED &&
 840                    capabilities & BDI_CAP_MAP_DIRECT)
 841                        goto dont_share_VMAs;
 842
 843                for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
 844                        vma = rb_entry(rb, struct vm_area_struct, vm_rb);
 845
 846                        if (!(vma->vm_flags & VM_MAYSHARE))
 847                                continue;
 848
 849                        /* search for overlapping mappings on the same file */
 850                        if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode)
 851                                continue;
 852
 853                        if (vma->vm_pgoff >= pgoff + pglen)
 854                                continue;
 855
 856                        vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
 857                        vmpglen >>= PAGE_SHIFT;
 858                        if (pgoff >= vma->vm_pgoff + vmpglen)
 859                                continue;
 860
 861                        /* handle inexactly overlapping matches between mappings */
 862                        if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
 863                                if (!(capabilities & BDI_CAP_MAP_DIRECT))
 864                                        goto sharing_violation;
 865                                continue;
 866                        }
 867
 868                        /* we've found a VMA we can share */
 869                        atomic_inc(&vma->vm_usage);
 870
 871                        vml->vma = vma;
 872                        result = (void *) vma->vm_start;
 873                        goto shared;
 874                }
 875
 876        dont_share_VMAs:
 877                vma = NULL;
 878
 879                /* obtain the address at which to make a shared mapping
 880                 * - this is the hook for quasi-memory character devices to
 881                 *   tell us the location of a shared mapping
 882                 */
 883                if (file && file->f_op->get_unmapped_area) {
 884                        addr = file->f_op->get_unmapped_area(file, addr, len,
 885                                                             pgoff, flags);
 886                        if (IS_ERR((void *) addr)) {
 887                                ret = addr;
 888                                if (ret != (unsigned long) -ENOSYS)
 889                                        goto error;
 890
 891                                /* the driver refused to tell us where to site
 892                                 * the mapping so we'll have to attempt to copy
 893                                 * it */
 894                                ret = (unsigned long) -ENODEV;
 895                                if (!(capabilities & BDI_CAP_MAP_COPY))
 896                                        goto error;
 897
 898                                capabilities &= ~BDI_CAP_MAP_DIRECT;
 899                        }
 900                }
 901        }
 902
 903        /* we're going to need a VMA struct as well */
 904        vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
 905        if (!vma)
 906                goto error_getting_vma;
 907
 908        INIT_LIST_HEAD(&vma->anon_vma_node);
 909        atomic_set(&vma->vm_usage, 1);
 910        if (file)
 911                get_file(file);
 912        vma->vm_file    = file;
 913        vma->vm_flags   = vm_flags;
 914        vma->vm_start   = addr;
 915        vma->vm_end     = addr + len;
 916        vma->vm_pgoff   = pgoff;
 917
 918        vml->vma = vma;
 919
 920        /* set up the mapping */
 921        if (file && vma->vm_flags & VM_SHARED)
 922                ret = do_mmap_shared_file(vma, len);
 923        else
 924                ret = do_mmap_private(vma, len);
 925        if (ret < 0)
 926                goto error;
 927
 928        /* okay... we have a mapping; now we have to register it */
 929        result = (void *) vma->vm_start;
 930
 931        if (vma->vm_flags & VM_MAPPED_COPY) {
 932                realalloc += kobjsize(result);
 933                askedalloc += len;
 934        }
 935
 936        realalloc += kobjsize(vma);
 937        askedalloc += sizeof(*vma);
 938
 939        current->mm->total_vm += len >> PAGE_SHIFT;
 940
 941        add_nommu_vma(vma);
 942
 943 shared:
 944        realalloc += kobjsize(vml);
 945        askedalloc += sizeof(*vml);
 946
 947        add_vma_to_mm(current->mm, vml);
 948
 949        up_write(&nommu_vma_sem);
 950
 951        if (prot & PROT_EXEC)
 952                flush_icache_range((unsigned long) result,
 953                                   (unsigned long) result + len);
 954
 955#ifdef DEBUG
 956        printk("do_mmap:\n");
 957        show_process_blocks();
 958#endif
 959
 960        return (unsigned long) result;
 961
 962 error:
 963        up_write(&nommu_vma_sem);
 964        kfree(vml);
 965        if (vma) {
 966                if (vma->vm_file)
 967                        fput(vma->vm_file);
 968                kfree(vma);
 969        }
 970        return ret;
 971
 972 sharing_violation:
 973        up_write(&nommu_vma_sem);
 974        printk("Attempt to share mismatched mappings\n");
 975        kfree(vml);
 976        return -EINVAL;
 977
 978 error_getting_vma:
 979        up_write(&nommu_vma_sem);
 980        kfree(vml);
 981        printk("Allocation of vma for %lu byte allocation from process %d failed\n",
 982               len, current->pid);
 983        show_free_areas();
 984        return -ENOMEM;
 985
 986 error_getting_vml:
 987        printk("Allocation of vml for %lu byte allocation from process %d failed\n",
 988               len, current->pid);
 989        show_free_areas();
 990        return -ENOMEM;
 991}
 992
 993/*
 994 * handle mapping disposal for uClinux
 995 */
 996static void put_vma(struct vm_area_struct *vma)
 997{
 998        if (vma) {
 999                down_write(&nommu_vma_sem);
1000
1001                if (atomic_dec_and_test(&vma->vm_usage)) {
1002                        delete_nommu_vma(vma);
1003
1004                        if (vma->vm_ops && vma->vm_ops->close)
1005                                vma->vm_ops->close(vma);
1006
1007                        /* IO memory and memory shared directly out of the pagecache from
1008                         * ramfs/tmpfs mustn't be released here */
1009                        if (vma->vm_flags & VM_MAPPED_COPY) {
1010                                realalloc -= kobjsize((void *) vma->vm_start);
1011                                askedalloc -= vma->vm_end - vma->vm_start;
1012                                kfree((void *) vma->vm_start);
1013                        }
1014
1015                        realalloc -= kobjsize(vma);
1016                        askedalloc -= sizeof(*vma);
1017
1018                        if (vma->vm_file)
1019                                fput(vma->vm_file);
1020                        kfree(vma);
1021                }
1022
1023                up_write(&nommu_vma_sem);
1024        }
1025}
1026
1027/*
1028 * release a mapping
1029 * - under NOMMU conditions the parameters must match exactly to the mapping to
1030 *   be removed
1031 */
1032int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
1033{
1034        struct vm_list_struct *vml, **parent;
1035        unsigned long end = addr + len;
1036
1037#ifdef DEBUG
1038        printk("do_munmap:\n");
1039#endif
1040
1041        for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) {
1042                if ((*parent)->vma->vm_start > addr)
1043                        break;
1044                if ((*parent)->vma->vm_start == addr &&
1045                    ((len == 0) || ((*parent)->vma->vm_end == end)))
1046                        goto found;
1047        }
1048
1049        printk("munmap of non-mmaped memory by process %d (%s): %p\n",
1050               current->pid, current->comm, (void *) addr);
1051        return -EINVAL;
1052
1053 found:
1054        vml = *parent;
1055
1056        put_vma(vml->vma);
1057
1058        *parent = vml->next;
1059        realalloc -= kobjsize(vml);
1060        askedalloc -= sizeof(*vml);
1061        kfree(vml);
1062
1063        update_hiwater_vm(mm);
1064        mm->total_vm -= len >> PAGE_SHIFT;
1065
1066#ifdef DEBUG
1067        show_process_blocks();
1068#endif
1069
1070        return 0;
1071}
1072
1073asmlinkage long sys_munmap(unsigned long addr, size_t len)
1074{
1075        int ret;
1076        struct mm_struct *mm = current->mm;
1077
1078        down_write(&mm->mmap_sem);
1079        ret = do_munmap(mm, addr, len);
1080        up_write(&mm->mmap_sem);
1081        return ret;
1082}
1083
1084/*
1085 * Release all mappings
1086 */
1087void exit_mmap(struct mm_struct * mm)
1088{
1089        struct vm_list_struct *tmp;
1090
1091        if (mm) {
1092#ifdef DEBUG
1093                printk("Exit_mmap:\n");
1094#endif
1095
1096                mm->total_vm = 0;
1097
1098                while ((tmp = mm->context.vmlist)) {
1099                        mm->context.vmlist = tmp->next;
1100                        put_vma(tmp->vma);
1101
1102                        realalloc -= kobjsize(tmp);
1103                        askedalloc -= sizeof(*tmp);
1104                        kfree(tmp);
1105                }
1106
1107#ifdef DEBUG
1108                show_process_blocks();
1109#endif
1110        }
1111}
1112
1113unsigned long do_brk(unsigned long addr, unsigned long len)
1114{
1115        return -ENOMEM;
1116}
1117
1118/*
1119 * expand (or shrink) an existing mapping, potentially moving it at the same
1120 * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1121 *
1122 * under NOMMU conditions, we only permit changing a mapping's size, and only
1123 * as long as it stays within the hole allocated by the kmalloc() call in
1124 * do_mmap_pgoff() and the block is not shareable
1125 *
1126 * MREMAP_FIXED is not supported under NOMMU conditions
1127 */
1128unsigned long do_mremap(unsigned long addr,
1129                        unsigned long old_len, unsigned long new_len,
1130                        unsigned long flags, unsigned long new_addr)
1131{
1132        struct vm_area_struct *vma;
1133
1134        /* insanity checks first */
1135        if (new_len == 0)
1136                return (unsigned long) -EINVAL;
1137
1138        if (flags & MREMAP_FIXED && new_addr != addr)
1139                return (unsigned long) -EINVAL;
1140
1141        vma = find_vma_exact(current->mm, addr);
1142        if (!vma)
1143                return (unsigned long) -EINVAL;
1144
1145        if (vma->vm_end != vma->vm_start + old_len)
1146                return (unsigned long) -EFAULT;
1147
1148        if (vma->vm_flags & VM_MAYSHARE)
1149                return (unsigned long) -EPERM;
1150
1151        if (new_len > kobjsize((void *) addr))
1152                return (unsigned long) -ENOMEM;
1153
1154        /* all checks complete - do it */
1155        vma->vm_end = vma->vm_start + new_len;
1156
1157        askedalloc -= old_len;
1158        askedalloc += new_len;
1159
1160        return vma->vm_start;
1161}
1162
1163asmlinkage unsigned long sys_mremap(unsigned long addr,
1164        unsigned long old_len, unsigned long new_len,
1165        unsigned long flags, unsigned long new_addr)
1166{
1167        unsigned long ret;
1168
1169        down_write(&current->mm->mmap_sem);
1170        ret = do_mremap(addr, old_len, new_len, flags, new_addr);
1171        up_write(&current->mm->mmap_sem);
1172        return ret;
1173}
1174
1175struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1176                        unsigned int foll_flags)
1177{
1178        return NULL;
1179}
1180
1181int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
1182                unsigned long to, unsigned long size, pgprot_t prot)
1183{
1184        vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
1185        return 0;
1186}
1187EXPORT_SYMBOL(remap_pfn_range);
1188
1189void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1190{
1191}
1192
1193unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1194        unsigned long len, unsigned long pgoff, unsigned long flags)
1195{
1196        return -ENOMEM;
1197}
1198
1199void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1200{
1201}
1202
1203void unmap_mapping_range(struct address_space *mapping,
1204                         loff_t const holebegin, loff_t const holelen,
1205                         int even_cows)
1206{
1207}
1208EXPORT_SYMBOL(unmap_mapping_range);
1209
1210/*
1211 * ask for an unmapped area at which to create a mapping on a file
1212 */
1213unsigned long get_unmapped_area(struct file *file, unsigned long addr,
1214                                unsigned long len, unsigned long pgoff,
1215                                unsigned long flags)
1216{
1217        unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
1218                                  unsigned long, unsigned long);
1219
1220        get_area = current->mm->get_unmapped_area;
1221        if (file && file->f_op && file->f_op->get_unmapped_area)
1222                get_area = file->f_op->get_unmapped_area;
1223
1224        if (!get_area)
1225                return -ENOSYS;
1226
1227        return get_area(file, addr, len, pgoff, flags);
1228}
1229
1230EXPORT_SYMBOL(get_unmapped_area);
1231
1232/*
1233 * Check that a process has enough memory to allocate a new virtual
1234 * mapping. 0 means there is enough memory for the allocation to
1235 * succeed and -ENOMEM implies there is not.
1236 *
1237 * We currently support three overcommit policies, which are set via the
1238 * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
1239 *
1240 * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
1241 * Additional code 2002 Jul 20 by Robert Love.
1242 *
1243 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
1244 *
1245 * Note this is a helper function intended to be used by LSMs which
1246 * wish to use this logic.
1247 */
1248int __vm_enough_memory(long pages, int cap_sys_admin)
1249{
1250        unsigned long free, allowed;
1251
1252        vm_acct_memory(pages);
1253
1254        /*
1255         * Sometimes we want to use more memory than we have
1256         */
1257        if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1258                return 0;
1259
1260        if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1261                unsigned long n;
1262
1263                free = global_page_state(NR_FILE_PAGES);
1264                free += nr_swap_pages;
1265
1266                /*
1267                 * Any slabs which are created with the
1268                 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1269                 * which are reclaimable, under pressure.  The dentry
1270                 * cache and most inode caches should fall into this
1271                 */
1272                free += global_page_state(NR_SLAB_RECLAIMABLE);
1273
1274                /*
1275                 * Leave the last 3% for root
1276                 */
1277                if (!cap_sys_admin)
1278                        free -= free / 32;
1279
1280                if (free > pages)
1281                        return 0;
1282
1283                /*
1284                 * nr_free_pages() is very expensive on large systems,
1285                 * only call if we're about to fail.
1286                 */
1287                n = nr_free_pages();
1288
1289                /*
1290                 * Leave reserved pages. The pages are not for anonymous pages.
1291                 */
1292                if (n <= totalreserve_pages)
1293                        goto error;
1294                else
1295                        n -= totalreserve_pages;
1296
1297                /*
1298                 * Leave the last 3% for root
1299                 */
1300                if (!cap_sys_admin)
1301                        n -= n / 32;
1302                free += n;
1303
1304                if (free > pages)
1305                        return 0;
1306
1307                goto error;
1308        }
1309
1310        allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1311        /*
1312         * Leave the last 3% for root
1313         */
1314        if (!cap_sys_admin)
1315                allowed -= allowed / 32;
1316        allowed += total_swap_pages;
1317
1318        /* Don't let a single process grow too big:
1319           leave 3% of the size of this process for other processes */
1320        allowed -= current->mm->total_vm / 32;
1321
1322        /*
1323         * cast `allowed' as a signed long because vm_committed_space
1324         * sometimes has a negative value
1325         */
1326        if (atomic_read(&vm_committed_space) < (long)allowed)
1327                return 0;
1328error:
1329        vm_unacct_memory(pages);
1330
1331        return -ENOMEM;
1332}
1333
1334int in_gate_area_no_task(unsigned long addr)
1335{
1336        return 0;
1337}
1338
1339struct page *filemap_nopage(struct vm_area_struct *area,
1340                        unsigned long address, int *type)
1341{
1342        BUG();
1343        return NULL;
1344}
1345
1346/*
1347 * Access another process' address space.
1348 * - source/target buffer must be kernel space
1349 */
1350int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1351{
1352        struct vm_area_struct *vma;
1353        struct mm_struct *mm;
1354
1355        if (addr + len < addr)
1356                return 0;
1357
1358        mm = get_task_mm(tsk);
1359        if (!mm)
1360                return 0;
1361
1362        down_read(&mm->mmap_sem);
1363
1364        /* the access must start within one of the target process's mappings */
1365        vma = find_vma(mm, addr);
1366        if (vma) {
1367                /* don't overrun this mapping */
1368                if (addr + len >= vma->vm_end)
1369                        len = vma->vm_end - addr;
1370
1371                /* only read or write mappings where it is permitted */
1372                if (write && vma->vm_flags & VM_MAYWRITE)
1373                        len -= copy_to_user((void *) addr, buf, len);
1374                else if (!write && vma->vm_flags & VM_MAYREAD)
1375                        len -= copy_from_user(buf, (void *) addr, len);
1376                else
1377                        len = 0;
1378        } else {
1379                len = 0;
1380        }
1381
1382        up_read(&mm->mmap_sem);
1383        mmput(mm);
1384        return len;
1385}
1386
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.