linux/mm/filemap_xip.c
<<
>>
Prefs
   1/*
   2 *      linux/mm/filemap_xip.c
   3 *
   4 * Copyright (C) 2005 IBM Corporation
   5 * Author: Carsten Otte <cotte@de.ibm.com>
   6 *
   7 * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
   8 *
   9 */
  10
  11#include <linux/fs.h>
  12#include <linux/pagemap.h>
  13#include <linux/module.h>
  14#include <linux/uio.h>
  15#include <linux/rmap.h>
  16#include <linux/sched.h>
  17#include <asm/tlbflush.h>
  18#include "filemap.h"
  19
  20/*
  21 * We do use our own empty page to avoid interference with other users
  22 * of ZERO_PAGE(), such as /dev/zero
  23 */
  24static struct page *__xip_sparse_page;
  25
  26static struct page *xip_sparse_page(void)
  27{
  28        if (!__xip_sparse_page) {
  29                unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER);
  30                if (zeroes) {
  31                        static DEFINE_SPINLOCK(xip_alloc_lock);
  32                        spin_lock(&xip_alloc_lock);
  33                        if (!__xip_sparse_page)
  34                                __xip_sparse_page = virt_to_page(zeroes);
  35                        else
  36                                free_page(zeroes);
  37                        spin_unlock(&xip_alloc_lock);
  38                }
  39        }
  40        return __xip_sparse_page;
  41}
  42
  43/*
  44 * This is a file read routine for execute in place files, and uses
  45 * the mapping->a_ops->get_xip_page() function for the actual low-level
  46 * stuff.
  47 *
  48 * Note the struct file* is not used at all.  It may be NULL.
  49 */
  50static void
  51do_xip_mapping_read(struct address_space *mapping,
  52                    struct file_ra_state *_ra,
  53                    struct file *filp,
  54                    loff_t *ppos,
  55                    read_descriptor_t *desc,
  56                    read_actor_t actor)
  57{
  58        struct inode *inode = mapping->host;
  59        unsigned long index, end_index, offset;
  60        loff_t isize;
  61
  62        BUG_ON(!mapping->a_ops->get_xip_page);
  63
  64        index = *ppos >> PAGE_CACHE_SHIFT;
  65        offset = *ppos & ~PAGE_CACHE_MASK;
  66
  67        isize = i_size_read(inode);
  68        if (!isize)
  69                goto out;
  70
  71        end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
  72        for (;;) {
  73                struct page *page;
  74                unsigned long nr, ret;
  75
  76                /* nr is the maximum number of bytes to copy from this page */
  77                nr = PAGE_CACHE_SIZE;
  78                if (index >= end_index) {
  79                        if (index > end_index)
  80                                goto out;
  81                        nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
  82                        if (nr <= offset) {
  83                                goto out;
  84                        }
  85                }
  86                nr = nr - offset;
  87
  88                page = mapping->a_ops->get_xip_page(mapping,
  89                        index*(PAGE_SIZE/512), 0);
  90                if (!page)
  91                        goto no_xip_page;
  92                if (unlikely(IS_ERR(page))) {
  93                        if (PTR_ERR(page) == -ENODATA) {
  94                                /* sparse */
  95                                page = ZERO_PAGE(0);
  96                        } else {
  97                                desc->error = PTR_ERR(page);
  98                                goto out;
  99                        }
 100                }
 101
 102                /* If users can be writing to this page using arbitrary
 103                 * virtual addresses, take care about potential aliasing
 104                 * before reading the page on the kernel side.
 105                 */
 106                if (mapping_writably_mapped(mapping))
 107                        flush_dcache_page(page);
 108
 109                /*
 110                 * Ok, we have the page, so now we can copy it to user space...
 111                 *
 112                 * The actor routine returns how many bytes were actually used..
 113                 * NOTE! This may not be the same as how much of a user buffer
 114                 * we filled up (we may be padding etc), so we can only update
 115                 * "pos" here (the actor routine has to update the user buffer
 116                 * pointers and the remaining count).
 117                 */
 118                ret = actor(desc, page, offset, nr);
 119                offset += ret;
 120                index += offset >> PAGE_CACHE_SHIFT;
 121                offset &= ~PAGE_CACHE_MASK;
 122
 123                if (ret == nr && desc->count)
 124                        continue;
 125                goto out;
 126
 127no_xip_page:
 128                /* Did not get the page. Report it */
 129                desc->error = -EIO;
 130                goto out;
 131        }
 132
 133out:
 134        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
 135        if (filp)
 136                file_accessed(filp);
 137}
 138
 139ssize_t
 140xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 141{
 142        read_descriptor_t desc;
 143
 144        if (!access_ok(VERIFY_WRITE, buf, len))
 145                return -EFAULT;
 146
 147        desc.written = 0;
 148        desc.arg.buf = buf;
 149        desc.count = len;
 150        desc.error = 0;
 151
 152        do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
 153                            ppos, &desc, file_read_actor);
 154
 155        if (desc.written)
 156                return desc.written;
 157        else
 158                return desc.error;
 159}
 160EXPORT_SYMBOL_GPL(xip_file_read);
 161
 162ssize_t
 163xip_file_sendfile(struct file *in_file, loff_t *ppos,
 164             size_t count, read_actor_t actor, void *target)
 165{
 166        read_descriptor_t desc;
 167
 168        if (!count)
 169                return 0;
 170
 171        desc.written = 0;
 172        desc.count = count;
 173        desc.arg.data = target;
 174        desc.error = 0;
 175
 176        do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file,
 177                            ppos, &desc, actor);
 178        if (desc.written)
 179                return desc.written;
 180        return desc.error;
 181}
 182EXPORT_SYMBOL_GPL(xip_file_sendfile);
 183
 184/*
 185 * __xip_unmap is invoked from xip_unmap and
 186 * xip_write
 187 *
 188 * This function walks all vmas of the address_space and unmaps the
 189 * __xip_sparse_page when found at pgoff.
 190 */
 191static void
 192__xip_unmap (struct address_space * mapping,
 193                     unsigned long pgoff)
 194{
 195        struct vm_area_struct *vma;
 196        struct mm_struct *mm;
 197        struct prio_tree_iter iter;
 198        unsigned long address;
 199        pte_t *pte;
 200        pte_t pteval;
 201        spinlock_t *ptl;
 202        struct page *page;
 203
 204        page = __xip_sparse_page;
 205        if (!page)
 206                return;
 207
 208        spin_lock(&mapping->i_mmap_lock);
 209        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 210                mm = vma->vm_mm;
 211                address = vma->vm_start +
 212                        ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
 213                BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 214                pte = page_check_address(page, mm, address, &ptl);
 215                if (pte) {
 216                        /* Nuke the page table entry. */
 217                        flush_cache_page(vma, address, pte_pfn(*pte));
 218                        pteval = ptep_clear_flush(vma, address, pte);
 219                        page_remove_rmap(page, vma);
 220                        dec_mm_counter(mm, file_rss);
 221                        BUG_ON(pte_dirty(pteval));
 222                        pte_unmap_unlock(pte, ptl);
 223                        page_cache_release(page);
 224                }
 225        }
 226        spin_unlock(&mapping->i_mmap_lock);
 227}
 228
 229/*
 230 * xip_nopage() is invoked via the vma operations vector for a
 231 * mapped memory region to read in file data during a page fault.
 232 *
 233 * This function is derived from filemap_nopage, but used for execute in place
 234 */
 235static struct page *
 236xip_file_nopage(struct vm_area_struct * area,
 237                   unsigned long address,
 238                   int *type)
 239{
 240        struct file *file = area->vm_file;
 241        struct address_space *mapping = file->f_mapping;
 242        struct inode *inode = mapping->host;
 243        struct page *page;
 244        unsigned long size, pgoff, endoff;
 245
 246        pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
 247                + area->vm_pgoff;
 248        endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
 249                + area->vm_pgoff;
 250
 251        size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 252        if (pgoff >= size)
 253                return NOPAGE_SIGBUS;
 254
 255        page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
 256        if (!IS_ERR(page))
 257                goto out;
 258        if (PTR_ERR(page) != -ENODATA)
 259                return NOPAGE_SIGBUS;
 260
 261        /* sparse block */
 262        if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
 263            (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
 264            (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
 265                /* maybe shared writable, allocate new block */
 266                page = mapping->a_ops->get_xip_page (mapping,
 267                        pgoff*(PAGE_SIZE/512), 1);
 268                if (IS_ERR(page))
 269                        return NOPAGE_SIGBUS;
 270                /* unmap page at pgoff from all other vmas */
 271                __xip_unmap(mapping, pgoff);
 272        } else {
 273                /* not shared and writable, use xip_sparse_page() */
 274                page = xip_sparse_page();
 275                if (!page)
 276                        return NOPAGE_OOM;
 277        }
 278
 279out:
 280        page_cache_get(page);
 281        return page;
 282}
 283
 284static struct vm_operations_struct xip_file_vm_ops = {
 285        .nopage         = xip_file_nopage,
 286};
 287
 288int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
 289{
 290        BUG_ON(!file->f_mapping->a_ops->get_xip_page);
 291
 292        file_accessed(file);
 293        vma->vm_ops = &xip_file_vm_ops;
 294        return 0;
 295}
 296EXPORT_SYMBOL_GPL(xip_file_mmap);
 297
 298static ssize_t
 299__xip_file_write(struct file *filp, const char __user *buf,
 300                  size_t count, loff_t pos, loff_t *ppos)
 301{
 302        struct address_space * mapping = filp->f_mapping;
 303        const struct address_space_operations *a_ops = mapping->a_ops;
 304        struct inode    *inode = mapping->host;
 305        long            status = 0;
 306        struct page     *page;
 307        size_t          bytes;
 308        ssize_t         written = 0;
 309
 310        BUG_ON(!mapping->a_ops->get_xip_page);
 311
 312        do {
 313                unsigned long index;
 314                unsigned long offset;
 315                size_t copied;
 316
 317                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
 318                index = pos >> PAGE_CACHE_SHIFT;
 319                bytes = PAGE_CACHE_SIZE - offset;
 320                if (bytes > count)
 321                        bytes = count;
 322
 323                /*
 324                 * Bring in the user page that we will copy from _first_.
 325                 * Otherwise there's a nasty deadlock on copying from the
 326                 * same page as we're writing to, without it being marked
 327                 * up-to-date.
 328                 */
 329                fault_in_pages_readable(buf, bytes);
 330
 331                page = a_ops->get_xip_page(mapping,
 332                                           index*(PAGE_SIZE/512), 0);
 333                if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
 334                        /* we allocate a new page unmap it */
 335                        page = a_ops->get_xip_page(mapping,
 336                                                   index*(PAGE_SIZE/512), 1);
 337                        if (!IS_ERR(page))
 338                                /* unmap page at pgoff from all other vmas */
 339                                __xip_unmap(mapping, index);
 340                }
 341
 342                if (IS_ERR(page)) {
 343                        status = PTR_ERR(page);
 344                        break;
 345                }
 346
 347                copied = filemap_copy_from_user(page, offset, buf, bytes);
 348                flush_dcache_page(page);
 349                if (likely(copied > 0)) {
 350                        status = copied;
 351
 352                        if (status >= 0) {
 353                                written += status;
 354                                count -= status;
 355                                pos += status;
 356                                buf += status;
 357                        }
 358                }
 359                if (unlikely(copied != bytes))
 360                        if (status >= 0)
 361                                status = -EFAULT;
 362                if (status < 0)
 363                        break;
 364        } while (count);
 365        *ppos = pos;
 366        /*
 367         * No need to use i_size_read() here, the i_size
 368         * cannot change under us because we hold i_mutex.
 369         */
 370        if (pos > inode->i_size) {
 371                i_size_write(inode, pos);
 372                mark_inode_dirty(inode);
 373        }
 374
 375        return written ? written : status;
 376}
 377
 378ssize_t
 379xip_file_write(struct file *filp, const char __user *buf, size_t len,
 380               loff_t *ppos)
 381{
 382        struct address_space *mapping = filp->f_mapping;
 383        struct inode *inode = mapping->host;
 384        size_t count;
 385        loff_t pos;
 386        ssize_t ret;
 387
 388        mutex_lock(&inode->i_mutex);
 389
 390        if (!access_ok(VERIFY_READ, buf, len)) {
 391                ret=-EFAULT;
 392                goto out_up;
 393        }
 394
 395        pos = *ppos;
 396        count = len;
 397
 398        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 399
 400        /* We can write back this queue in page reclaim */
 401        current->backing_dev_info = mapping->backing_dev_info;
 402
 403        ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
 404        if (ret)
 405                goto out_backing;
 406        if (count == 0)
 407                goto out_backing;
 408
 409        ret = remove_suid(filp->f_path.dentry);
 410        if (ret)
 411                goto out_backing;
 412
 413        file_update_time(filp);
 414
 415        ret = __xip_file_write (filp, buf, count, pos, ppos);
 416
 417 out_backing:
 418        current->backing_dev_info = NULL;
 419 out_up:
 420        mutex_unlock(&inode->i_mutex);
 421        return ret;
 422}
 423EXPORT_SYMBOL_GPL(xip_file_write);
 424
 425/*
 426 * truncate a page used for execute in place
 427 * functionality is analog to block_truncate_page but does use get_xip_page
 428 * to get the page instead of page cache
 429 */
 430int
 431xip_truncate_page(struct address_space *mapping, loff_t from)
 432{
 433        pgoff_t index = from >> PAGE_CACHE_SHIFT;
 434        unsigned offset = from & (PAGE_CACHE_SIZE-1);
 435        unsigned blocksize;
 436        unsigned length;
 437        struct page *page;
 438
 439        BUG_ON(!mapping->a_ops->get_xip_page);
 440
 441        blocksize = 1 << mapping->host->i_blkbits;
 442        length = offset & (blocksize - 1);
 443
 444        /* Block boundary? Nothing to do */
 445        if (!length)
 446                return 0;
 447
 448        length = blocksize - length;
 449
 450        page = mapping->a_ops->get_xip_page(mapping,
 451                                            index*(PAGE_SIZE/512), 0);
 452        if (!page)
 453                return -ENOMEM;
 454        if (unlikely(IS_ERR(page))) {
 455                if (PTR_ERR(page) == -ENODATA)
 456                        /* Hole? No need to truncate */
 457                        return 0;
 458                else
 459                        return PTR_ERR(page);
 460        }
 461        zero_user_page(page, offset, length, KM_USER0);
 462        return 0;
 463}
 464EXPORT_SYMBOL_GPL(xip_truncate_page);
 465
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.