linux/mm/fremap.c
<<
>>
Prefs
   1/*
   2 *   linux/mm/fremap.c
   3 * 
   4 * Explicit pagetable population and nonlinear (random) mappings support.
   5 *
   6 * started by Ingo Molnar, Copyright (C) 2002, 2003
   7 */
   8
   9#include <linux/mm.h>
  10#include <linux/swap.h>
  11#include <linux/file.h>
  12#include <linux/mman.h>
  13#include <linux/pagemap.h>
  14#include <linux/swapops.h>
  15#include <linux/rmap.h>
  16#include <linux/module.h>
  17#include <linux/syscalls.h>
  18
  19#include <asm/mmu_context.h>
  20#include <asm/cacheflush.h>
  21#include <asm/tlbflush.h>
  22
  23static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
  24                        unsigned long addr, pte_t *ptep)
  25{
  26        pte_t pte = *ptep;
  27        struct page *page = NULL;
  28
  29        if (pte_present(pte)) {
  30                flush_cache_page(vma, addr, pte_pfn(pte));
  31                pte = ptep_clear_flush(vma, addr, ptep);
  32                page = vm_normal_page(vma, addr, pte);
  33                if (page) {
  34                        if (pte_dirty(pte))
  35                                set_page_dirty(page);
  36                        page_remove_rmap(page, vma);
  37                        page_cache_release(page);
  38                }
  39        } else {
  40                if (!pte_file(pte))
  41                        free_swap_and_cache(pte_to_swp_entry(pte));
  42                pte_clear_not_present_full(mm, addr, ptep, 0);
  43        }
  44        return !!page;
  45}
  46
  47/*
  48 * Install a file page to a given virtual memory address, release any
  49 * previously existing mapping.
  50 */
  51int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
  52                unsigned long addr, struct page *page, pgprot_t prot)
  53{
  54        struct inode *inode;
  55        pgoff_t size;
  56        int err = -ENOMEM;
  57        pte_t *pte;
  58        pte_t pte_val;
  59        spinlock_t *ptl;
  60
  61        pte = get_locked_pte(mm, addr, &ptl);
  62        if (!pte)
  63                goto out;
  64
  65        /*
  66         * This page may have been truncated. Tell the
  67         * caller about it.
  68         */
  69        err = -EINVAL;
  70        inode = vma->vm_file->f_mapping->host;
  71        size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
  72        if (!page->mapping || page->index >= size)
  73                goto unlock;
  74        err = -ENOMEM;
  75        if (page_mapcount(page) > INT_MAX/2)
  76                goto unlock;
  77
  78        if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
  79                inc_mm_counter(mm, file_rss);
  80
  81        flush_icache_page(vma, page);
  82        pte_val = mk_pte(page, prot);
  83        set_pte_at(mm, addr, pte, pte_val);
  84        page_add_file_rmap(page);
  85        update_mmu_cache(vma, addr, pte_val);
  86        lazy_mmu_prot_update(pte_val);
  87        err = 0;
  88unlock:
  89        pte_unmap_unlock(pte, ptl);
  90out:
  91        return err;
  92}
  93EXPORT_SYMBOL(install_page);
  94
  95/*
  96 * Install a file pte to a given virtual memory address, release any
  97 * previously existing mapping.
  98 */
  99int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 100                unsigned long addr, unsigned long pgoff, pgprot_t prot)
 101{
 102        int err = -ENOMEM;
 103        pte_t *pte;
 104        spinlock_t *ptl;
 105
 106        pte = get_locked_pte(mm, addr, &ptl);
 107        if (!pte)
 108                goto out;
 109
 110        if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
 111                update_hiwater_rss(mm);
 112                dec_mm_counter(mm, file_rss);
 113        }
 114
 115        set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
 116        /*
 117         * We don't need to run update_mmu_cache() here because the "file pte"
 118         * being installed by install_file_pte() is not a real pte - it's a
 119         * non-present entry (like a swap entry), noting what file offset should
 120         * be mapped there when there's a fault (in a non-linear vma where
 121         * that's not obvious).
 122         */
 123        pte_unmap_unlock(pte, ptl);
 124        err = 0;
 125out:
 126        return err;
 127}
 128
 129/***
 130 * sys_remap_file_pages - remap arbitrary pages of a shared backing store
 131 *                        file within an existing vma.
 132 * @start: start of the remapped virtual memory range
 133 * @size: size of the remapped virtual memory range
 134 * @prot: new protection bits of the range
 135 * @pgoff: to be mapped page of the backing store file
 136 * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
 137 *
 138 * this syscall works purely via pagetables, so it's the most efficient
 139 * way to map the same (large) file into a given virtual window. Unlike
 140 * mmap()/mremap() it does not create any new vmas. The new mappings are
 141 * also safe across swapout.
 142 *
 143 * NOTE: the 'prot' parameter right now is ignored, and the vma's default
 144 * protection is used. Arbitrary protections might be implemented in the
 145 * future.
 146 */
 147asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 148        unsigned long __prot, unsigned long pgoff, unsigned long flags)
 149{
 150        struct mm_struct *mm = current->mm;
 151        struct address_space *mapping;
 152        unsigned long end = start + size;
 153        struct vm_area_struct *vma;
 154        int err = -EINVAL;
 155        int has_write_lock = 0;
 156
 157        if (__prot)
 158                return err;
 159        /*
 160         * Sanitize the syscall parameters:
 161         */
 162        start = start & PAGE_MASK;
 163        size = size & PAGE_MASK;
 164
 165        /* Does the address range wrap, or is the span zero-sized? */
 166        if (start + size <= start)
 167                return err;
 168
 169        /* Can we represent this offset inside this architecture's pte's? */
 170#if PTE_FILE_MAX_BITS < BITS_PER_LONG
 171        if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
 172                return err;
 173#endif
 174
 175        /* We need down_write() to change vma->vm_flags. */
 176        down_read(&mm->mmap_sem);
 177 retry:
 178        vma = find_vma(mm, start);
 179
 180        /*
 181         * Make sure the vma is shared, that it supports prefaulting,
 182         * and that the remapped range is valid and fully within
 183         * the single existing vma.  vm_private_data is used as a
 184         * swapout cursor in a VM_NONLINEAR vma.
 185         */
 186        if (vma && (vma->vm_flags & VM_SHARED) &&
 187                (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) &&
 188                vma->vm_ops && vma->vm_ops->populate &&
 189                        end > start && start >= vma->vm_start &&
 190                                end <= vma->vm_end) {
 191
 192                /* Must set VM_NONLINEAR before any pages are populated. */
 193                if (pgoff != linear_page_index(vma, start) &&
 194                    !(vma->vm_flags & VM_NONLINEAR)) {
 195                        if (!has_write_lock) {
 196                                up_read(&mm->mmap_sem);
 197                                down_write(&mm->mmap_sem);
 198                                has_write_lock = 1;
 199                                goto retry;
 200                        }
 201                        mapping = vma->vm_file->f_mapping;
 202                        spin_lock(&mapping->i_mmap_lock);
 203                        flush_dcache_mmap_lock(mapping);
 204                        vma->vm_flags |= VM_NONLINEAR;
 205                        vma_prio_tree_remove(vma, &mapping->i_mmap);
 206                        vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 207                        flush_dcache_mmap_unlock(mapping);
 208                        spin_unlock(&mapping->i_mmap_lock);
 209                }
 210
 211                err = vma->vm_ops->populate(vma, start, size,
 212                                            vma->vm_page_prot,
 213                                            pgoff, flags & MAP_NONBLOCK);
 214
 215                /*
 216                 * We can't clear VM_NONLINEAR because we'd have to do
 217                 * it after ->populate completes, and that would prevent
 218                 * downgrading the lock.  (Locks can't be upgraded).
 219                 */
 220        }
 221        if (likely(!has_write_lock))
 222                up_read(&mm->mmap_sem);
 223        else
 224                up_write(&mm->mmap_sem);
 225
 226        return err;
 227}
 228
 229
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.