linux-old/mm/shmem.c
<<
>>
Prefs
   1/*
   2 * Resizable virtual memory filesystem for Linux.
   3 *
   4 * Copyright (C) 2000 Linus Torvalds.
   5 *               2000 Transmeta Corp.
   6 *               2000-2001 Christoph Rohland
   7 *               2000-2001 SAP AG
   8 *               2002 Red Hat Inc.
   9 * Copyright (C) 2002-2003 Hugh Dickins.
  10 * Copyright (C) 2002-2003 VERITAS Software Corporation.
  11 *
  12 * This file is released under the GPL.
  13 */
  14
  15/*
  16 * This virtual memory filesystem is heavily based on the ramfs. It
  17 * extends ramfs by the ability to use swap and honor resource limits
  18 * which makes it a completely usable filesystem.
  19 */
  20
  21#include <linux/config.h>
  22#include <linux/module.h>
  23#include <linux/init.h>
  24#include <linux/devfs_fs_kernel.h>
  25#include <linux/fs.h>
  26#include <linux/mm.h>
  27#include <linux/file.h>
  28#include <linux/swap.h>
  29#include <linux/pagemap.h>
  30#include <linux/string.h>
  31#include <linux/locks.h>
  32#include <linux/smp_lock.h>
  33
  34#include <asm/uaccess.h>
  35#include <asm/div64.h>
  36
  37/* This magic number is used in glibc for posix shared memory */
  38#define TMPFS_MAGIC     0x01021994
  39
  40#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
  41#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
  42#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
  43
  44#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
  45#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
  46
  47#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
  48
  49/* Pretend that each entry is of this size in directory's i_size */
  50#define BOGO_DIRENT_SIZE 20
  51
  52#define SHMEM_SB(sb) (&sb->u.shmem_sb)
  53
  54/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
  55enum sgp_type {
  56        SGP_READ,       /* don't exceed i_size, don't allocate page */
  57        SGP_CACHE,      /* don't exceed i_size, may allocate page */
  58        SGP_WRITE,      /* may exceed i_size, may allocate page */
  59};
  60
  61static int shmem_getpage(struct inode *inode, unsigned long idx,
  62                         struct page **pagep, enum sgp_type sgp);
  63
  64static struct super_operations shmem_ops;
  65static struct address_space_operations shmem_aops;
  66static struct file_operations shmem_file_operations;
  67static struct inode_operations shmem_inode_operations;
  68static struct inode_operations shmem_dir_inode_operations;
  69static struct vm_operations_struct shmem_vm_ops;
  70
  71LIST_HEAD(shmem_inodes);
  72static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
  73
  74static void shmem_free_block(struct inode *inode)
  75{
  76        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  77        spin_lock(&sbinfo->stat_lock);
  78        sbinfo->free_blocks++;
  79        inode->i_blocks -= BLOCKS_PER_PAGE;
  80        spin_unlock(&sbinfo->stat_lock);
  81}
  82
  83static void shmem_removepage(struct page *page)
  84{
  85        if (!PageLaunder(page))
  86                shmem_free_block(page->mapping->host);
  87}
  88
  89/*
  90 * shmem_swp_entry - find the swap vector position in the info structure
  91 *
  92 * @info:  info structure for the inode
  93 * @index: index of the page to find
  94 * @page:  optional page to add to the structure. Has to be preset to
  95 *         all zeros
  96 *
  97 * If there is no space allocated yet it will return NULL when
  98 * page is 0, else it will use the page for the needed block,
  99 * setting it to 0 on return to indicate that it has been used.
 100 *
 101 * The swap vector is organized the following way:
 102 *
 103 * There are SHMEM_NR_DIRECT entries directly stored in the
 104 * shmem_inode_info structure. So small files do not need an addional
 105 * allocation.
 106 *
 107 * For pages with index > SHMEM_NR_DIRECT there is the pointer
 108 * i_indirect which points to a page which holds in the first half
 109 * doubly indirect blocks, in the second half triple indirect blocks:
 110 *
 111 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
 112 * following layout (for SHMEM_NR_DIRECT == 16):
 113 *
 114 * i_indirect -> dir --> 16-19
 115 *            |      +-> 20-23
 116 *            |
 117 *            +-->dir2 --> 24-27
 118 *            |        +-> 28-31
 119 *            |        +-> 32-35
 120 *            |        +-> 36-39
 121 *            |
 122 *            +-->dir3 --> 40-43
 123 *                     +-> 44-47
 124 *                     +-> 48-51
 125 *                     +-> 52-55
 126 */
 127static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page)
 128{
 129        unsigned long offset;
 130        void **dir;
 131
 132        if (index < SHMEM_NR_DIRECT)
 133                return info->i_direct+index;
 134        if (!info->i_indirect) {
 135                if (page) {
 136                        info->i_indirect = (void **) *page;
 137                        *page = 0;
 138                }
 139                return NULL;                    /* need another page */
 140        }
 141
 142        index -= SHMEM_NR_DIRECT;
 143        offset = index % ENTRIES_PER_PAGE;
 144        index /= ENTRIES_PER_PAGE;
 145        dir = info->i_indirect;
 146
 147        if (index >= ENTRIES_PER_PAGE/2) {
 148                index -= ENTRIES_PER_PAGE/2;
 149                dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
 150                index %= ENTRIES_PER_PAGE;
 151                if (!*dir) {
 152                        if (page) {
 153                                *dir = (void *) *page;
 154                                *page = 0;
 155                        }
 156                        return NULL;            /* need another page */
 157                }
 158                dir = (void **) *dir;
 159        }
 160
 161        dir += index;
 162        if (!*dir) {
 163                if (!page || !*page)
 164                        return NULL;            /* need a page */
 165                *dir = (void *) *page;
 166                *page = 0;
 167        }
 168        return (swp_entry_t *) *dir + offset;
 169}
 170
 171/*
 172 * shmem_swp_alloc - get the position of the swap entry for the page.
 173 *                   If it does not exist allocate the entry.
 174 *
 175 * @info:       info structure for the inode
 176 * @index:      index of the page to find
 177 * @sgp:        check and recheck i_size? skip allocation?
 178 */
 179static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
 180{
 181        struct inode *inode = info->inode;
 182        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 183        unsigned long page = 0;
 184        swp_entry_t *entry;
 185        static const swp_entry_t unswapped = {0};
 186
 187        if (sgp != SGP_WRITE &&
 188            ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
 189                return ERR_PTR(-EINVAL);
 190
 191        while (!(entry = shmem_swp_entry(info, index, &page))) {
 192                if (sgp == SGP_READ)
 193                        return (swp_entry_t *) &unswapped;
 194                /*
 195                 * Test free_blocks against 1 not 0, since we have 1 data
 196                 * page (and perhaps indirect index pages) yet to allocate:
 197                 * a waste to allocate index if we cannot allocate data.
 198                 */
 199                spin_lock(&sbinfo->stat_lock);
 200                if (sbinfo->free_blocks <= 1) {
 201                        spin_unlock(&sbinfo->stat_lock);
 202                        return ERR_PTR(-ENOSPC);
 203                }
 204                sbinfo->free_blocks--;
 205                inode->i_blocks += BLOCKS_PER_PAGE;
 206                spin_unlock(&sbinfo->stat_lock);
 207
 208                spin_unlock(&info->lock);
 209                page = get_zeroed_page(GFP_USER);
 210                spin_lock(&info->lock);
 211
 212                if (!page) {
 213                        shmem_free_block(inode);
 214                        return ERR_PTR(-ENOMEM);
 215                }
 216                if (sgp != SGP_WRITE &&
 217                    ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
 218                        entry = ERR_PTR(-EINVAL);
 219                        break;
 220                }
 221                if (info->next_index <= index)
 222                        info->next_index = index + 1;
 223        }
 224        if (page) {
 225                /* another task gave its page, or truncated the file */
 226                shmem_free_block(inode);
 227                free_page(page);
 228        }
 229        if (info->next_index <= index && !IS_ERR(entry))
 230                info->next_index = index + 1;
 231        return entry;
 232}
 233
 234/*
 235 * shmem_free_swp - free some swap entries in a directory
 236 *
 237 * @dir:   pointer to the directory
 238 * @edir:  pointer after last entry of the directory
 239 */
 240static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
 241{
 242        swp_entry_t *ptr;
 243        int freed = 0;
 244
 245        for (ptr = dir; ptr < edir; ptr++) {
 246                if (ptr->val) {
 247                        free_swap_and_cache(*ptr);
 248                        *ptr = (swp_entry_t){0};
 249                        freed++;
 250                }
 251        }
 252        return freed;
 253}
 254
 255/*
 256 * shmem_truncate_direct - free the swap entries of a whole doubly
 257 *                         indirect block
 258 *
 259 * @info:       the info structure of the inode
 260 * @dir:        pointer to the pointer to the block
 261 * @start:      offset to start from (in pages)
 262 * @len:        how many pages are stored in this block
 263 */
 264static inline unsigned long
 265shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len)
 266{
 267        swp_entry_t **last, **ptr;
 268        unsigned long off, freed_swp, freed = 0;
 269
 270        last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE;
 271        off = start % ENTRIES_PER_PAGE;
 272
 273        for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) {
 274                if (!*ptr)
 275                        continue;
 276
 277                if (info->swapped) {
 278                        freed_swp = shmem_free_swp(*ptr + off,
 279                                                *ptr + ENTRIES_PER_PAGE);
 280                        info->swapped -= freed_swp;
 281                        freed += freed_swp;
 282                }
 283
 284                if (!off) {
 285                        freed++;
 286                        free_page((unsigned long) *ptr);
 287                        *ptr = 0;
 288                }
 289        }
 290
 291        if (!start) {
 292                freed++;
 293                free_page((unsigned long) *dir);
 294                *dir = 0;
 295        }
 296        return freed;
 297}
 298
 299/*
 300 * shmem_truncate_indirect - truncate an inode
 301 *
 302 * @info:  the info structure of the inode
 303 * @index: the index to truncate
 304 *
 305 * This function locates the last doubly indirect block and calls
 306 * then shmem_truncate_direct to do the real work
 307 */
 308static inline unsigned long
 309shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
 310{
 311        swp_entry_t ***base;
 312        unsigned long baseidx, start;
 313        unsigned long len = info->next_index;
 314        unsigned long freed;
 315
 316        if (len <= SHMEM_NR_DIRECT) {
 317                info->next_index = index;
 318                if (!info->swapped)
 319                        return 0;
 320                freed = shmem_free_swp(info->i_direct + index,
 321                                        info->i_direct + len);
 322                info->swapped -= freed;
 323                return freed;
 324        }
 325
 326        if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) {
 327                len -= SHMEM_NR_DIRECT;
 328                base = (swp_entry_t ***) &info->i_indirect;
 329                baseidx = SHMEM_NR_DIRECT;
 330        } else {
 331                len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
 332                BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2);
 333                baseidx = len - 1;
 334                baseidx -= baseidx % ENTRIES_PER_PAGEPAGE;
 335                base = (swp_entry_t ***) info->i_indirect +
 336                        ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE;
 337                len -= baseidx;
 338                baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
 339        }
 340
 341        if (index > baseidx) {
 342                info->next_index = index;
 343                start = index - baseidx;
 344        } else {
 345                info->next_index = baseidx;
 346                start = 0;
 347        }
 348        return *base? shmem_truncate_direct(info, base, start, len): 0;
 349}
 350
 351static void shmem_truncate(struct inode *inode)
 352{
 353        struct shmem_inode_info *info = SHMEM_I(inode);
 354        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 355        unsigned long freed = 0;
 356        unsigned long index;
 357
 358        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 359        index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 360        if (index >= info->next_index)
 361                return;
 362
 363        spin_lock(&info->lock);
 364        while (index < info->next_index)
 365                freed += shmem_truncate_indirect(info, index);
 366        BUG_ON(info->swapped > info->next_index);
 367        spin_unlock(&info->lock);
 368
 369        spin_lock(&sbinfo->stat_lock);
 370        sbinfo->free_blocks += freed;
 371        inode->i_blocks -= freed*BLOCKS_PER_PAGE;
 372        spin_unlock(&sbinfo->stat_lock);
 373}
 374
 375static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 376{
 377        struct inode *inode = dentry->d_inode;
 378        struct page *page = NULL;
 379        int error;
 380
 381        if (attr->ia_valid & ATTR_SIZE) {
 382                if (attr->ia_size < inode->i_size) {
 383                        /*
 384                         * If truncating down to a partial page, then
 385                         * if that page is already allocated, hold it
 386                         * in memory until the truncation is over, so
 387                         * truncate_partial_page cannnot miss it were
 388                         * it assigned to swap.
 389                         */
 390                        if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
 391                                (void) shmem_getpage(inode,
 392                                        attr->ia_size>>PAGE_CACHE_SHIFT,
 393                                                &page, SGP_READ);
 394                        }
 395                }
 396        }
 397
 398        error = inode_change_ok(inode, attr);
 399        if (!error)
 400                error = inode_setattr(inode, attr);
 401        if (page)
 402                page_cache_release(page);
 403        return error;
 404}
 405
 406static void shmem_delete_inode(struct inode *inode)
 407{
 408        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 409        struct shmem_inode_info *info = SHMEM_I(inode);
 410
 411        if (inode->i_op->truncate == shmem_truncate) {
 412                spin_lock(&shmem_ilock);
 413                list_del(&info->list);
 414                spin_unlock(&shmem_ilock);
 415                inode->i_size = 0;
 416                shmem_truncate(inode);
 417        }
 418        BUG_ON(inode->i_blocks);
 419        spin_lock(&sbinfo->stat_lock);
 420        sbinfo->free_inodes++;
 421        spin_unlock(&sbinfo->stat_lock);
 422        clear_inode(inode);
 423}
 424
 425static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
 426{
 427        swp_entry_t *ptr;
 428
 429        for (ptr = dir; ptr < edir; ptr++) {
 430                if (ptr->val == entry.val)
 431                        return ptr - dir;
 432        }
 433        return -1;
 434}
 435
 436static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 437{
 438        struct inode *inode;
 439        struct address_space *mapping;
 440        swp_entry_t *ptr;
 441        unsigned long idx;
 442        unsigned long limit;
 443        int offset;
 444
 445        idx = 0;
 446        ptr = info->i_direct;
 447        spin_lock(&info->lock);
 448        offset = info->next_index;
 449        if (offset > SHMEM_NR_DIRECT)
 450                offset = SHMEM_NR_DIRECT;
 451        offset = shmem_find_swp(entry, ptr, ptr + offset);
 452        if (offset >= 0)
 453                goto found;
 454
 455        for (idx = SHMEM_NR_DIRECT; idx < info->next_index;
 456             idx += ENTRIES_PER_PAGE) {
 457                ptr = shmem_swp_entry(info, idx, NULL);
 458                if (!ptr)
 459                        continue;
 460                offset = info->next_index - idx;
 461                if (offset > ENTRIES_PER_PAGE)
 462                        offset = ENTRIES_PER_PAGE;
 463                offset = shmem_find_swp(entry, ptr, ptr + offset);
 464                if (offset >= 0)
 465                        goto found;
 466        }
 467        spin_unlock(&info->lock);
 468        return 0;
 469found:
 470        idx += offset;
 471        inode = info->inode;
 472        mapping = inode->i_mapping;
 473        delete_from_swap_cache(page);
 474
 475        /* Racing against delete or truncate? Must leave out of page cache */
 476        limit = (inode->i_state & I_FREEING)? 0:
 477                (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 478
 479        if (idx >= limit || add_to_page_cache_unique(page,
 480                        mapping, idx, page_hash(mapping, idx)) == 0) {
 481                ptr[offset].val = 0;
 482                info->swapped--;
 483        } else if (add_to_swap_cache(page, entry) != 0)
 484                BUG();
 485        spin_unlock(&info->lock);
 486        SetPageUptodate(page);
 487        /*
 488         * Decrement swap count even when the entry is left behind:
 489         * try_to_unuse will skip over mms, then reincrement count.
 490         */
 491        swap_free(entry);
 492        return 1;
 493}
 494
 495/*
 496 * shmem_unuse() search for an eventually swapped out shmem page.
 497 */
 498int shmem_unuse(swp_entry_t entry, struct page *page)
 499{
 500        struct list_head *p;
 501        struct shmem_inode_info *info;
 502        int found = 0;
 503
 504        spin_lock(&shmem_ilock);
 505        list_for_each(p, &shmem_inodes) {
 506                info = list_entry(p, struct shmem_inode_info, list);
 507
 508                if (info->swapped && shmem_unuse_inode(info, entry, page)) {
 509                        /* move head to start search for next from here */
 510                        list_move_tail(&shmem_inodes, &info->list);
 511                        found = 1;
 512                        break;
 513                }
 514        }
 515        spin_unlock(&shmem_ilock);
 516        return found;
 517}
 518
 519/*
 520 * Move the page from the page cache to the swap cache.
 521 */
 522static int shmem_writepage(struct page *page)
 523{
 524        struct shmem_inode_info *info;
 525        swp_entry_t *entry, swap;
 526        struct address_space *mapping;
 527        unsigned long index;
 528        struct inode *inode;
 529
 530        BUG_ON(!PageLocked(page));
 531        if (!PageLaunder(page))
 532                return fail_writepage(page);
 533
 534        mapping = page->mapping;
 535        index = page->index;
 536        inode = mapping->host;
 537        info = SHMEM_I(inode);
 538        if (info->flags & VM_LOCKED)
 539                return fail_writepage(page);
 540getswap:
 541        swap = get_swap_page();
 542        if (!swap.val)
 543                return fail_writepage(page);
 544
 545        spin_lock(&info->lock);
 546        BUG_ON(index >= info->next_index);
 547        entry = shmem_swp_entry(info, index, NULL);
 548        BUG_ON(!entry);
 549        BUG_ON(entry->val);
 550
 551        /* Remove it from the page cache */
 552        remove_inode_page(page);
 553        page_cache_release(page);
 554
 555        /* Add it to the swap cache */
 556        if (add_to_swap_cache(page, swap) != 0) {
 557                /*
 558                 * Raced with "speculative" read_swap_cache_async.
 559                 * Add page back to page cache, unref swap, try again.
 560                 */
 561                add_to_page_cache_locked(page, mapping, index);
 562                spin_unlock(&info->lock);
 563                swap_free(swap);
 564                goto getswap;
 565        }
 566
 567        *entry = swap;
 568        info->swapped++;
 569        spin_unlock(&info->lock);
 570        SetPageUptodate(page);
 571        set_page_dirty(page);
 572        UnlockPage(page);
 573        return 0;
 574}
 575
 576/*
 577 * shmem_getpage - either get the page from swap or allocate a new one
 578 *
 579 * If we allocate a new one we do not mark it dirty. That's up to the
 580 * vm. If we swap it in we mark it dirty since we also free the swap
 581 * entry since a page cannot live in both the swap and page cache
 582 */
 583static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
 584{
 585        struct address_space *mapping = inode->i_mapping;
 586        struct shmem_inode_info *info = SHMEM_I(inode);
 587        struct shmem_sb_info *sbinfo;
 588        struct page *filepage = *pagep;
 589        struct page *swappage;
 590        swp_entry_t *entry;
 591        swp_entry_t swap;
 592        int error = 0;
 593
 594        if (idx >= SHMEM_MAX_INDEX)
 595                return -EFBIG;
 596        /*
 597         * Normally, filepage is NULL on entry, and either found
 598         * uptodate immediately, or allocated and zeroed, or read
 599         * in under swappage, which is then assigned to filepage.
 600         * But shmem_readpage and shmem_prepare_write pass in a locked
 601         * filepage, which may be found not uptodate by other callers
 602         * too, and may need to be copied from the swappage read in.
 603         */
 604repeat:
 605        if (!filepage)
 606                filepage = find_lock_page(mapping, idx);
 607        if (filepage && Page_Uptodate(filepage))
 608                goto done;
 609
 610        spin_lock(&info->lock);
 611        entry = shmem_swp_alloc(info, idx, sgp);
 612        if (IS_ERR(entry)) {
 613                spin_unlock(&info->lock);
 614                error = PTR_ERR(entry);
 615                goto failed;
 616        }
 617        swap = *entry;
 618
 619        if (swap.val) {
 620                /* Look it up and read it in.. */
 621                swappage = lookup_swap_cache(swap);
 622                if (!swappage) {
 623                        spin_unlock(&info->lock);
 624                        swapin_readahead(swap);
 625                        swappage = read_swap_cache_async(swap);
 626                        if (!swappage) {
 627                                spin_lock(&info->lock);
 628                                entry = shmem_swp_alloc(info, idx, sgp);
 629                                if (IS_ERR(entry))
 630                                        error = PTR_ERR(entry);
 631                                else if (entry->val == swap.val)
 632                                        error = -ENOMEM;
 633                                spin_unlock(&info->lock);
 634                                if (error)
 635                                        goto failed;
 636                                goto repeat;
 637                        }
 638                        wait_on_page(swappage);
 639                        page_cache_release(swappage);
 640                        goto repeat;
 641                }
 642
 643                /* We have to do this with page locked to prevent races */
 644                if (TryLockPage(swappage)) {
 645                        spin_unlock(&info->lock);
 646                        wait_on_page(swappage);
 647                        page_cache_release(swappage);
 648                        goto repeat;
 649                }
 650                if (!Page_Uptodate(swappage)) {
 651                        spin_unlock(&info->lock);
 652                        UnlockPage(swappage);
 653                        page_cache_release(swappage);
 654                        error = -EIO;
 655                        goto failed;
 656                }
 657
 658                delete_from_swap_cache(swappage);
 659                if (filepage) {
 660                        entry->val = 0;
 661                        info->swapped--;
 662                        spin_unlock(&info->lock);
 663                        flush_page_to_ram(swappage);
 664                        copy_highpage(filepage, swappage);
 665                        UnlockPage(swappage);
 666                        page_cache_release(swappage);
 667                        flush_dcache_page(filepage);
 668                        SetPageUptodate(filepage);
 669                        SetPageDirty(filepage);
 670                        swap_free(swap);
 671                } else if (add_to_page_cache_unique(swappage,
 672                        mapping, idx, page_hash(mapping, idx)) == 0) {
 673                        entry->val = 0;
 674                        info->swapped--;
 675                        spin_unlock(&info->lock);
 676                        filepage = swappage;
 677                        SetPageUptodate(filepage);
 678                        SetPageDirty(filepage);
 679                        swap_free(swap);
 680                } else {
 681                        if (add_to_swap_cache(swappage, swap) != 0)
 682                                BUG();
 683                        spin_unlock(&info->lock);
 684                        SetPageUptodate(swappage);
 685                        SetPageDirty(swappage);
 686                        UnlockPage(swappage);
 687                        page_cache_release(swappage);
 688                        goto repeat;
 689                }
 690        } else if (sgp == SGP_READ && !filepage) {
 691                filepage = find_get_page(mapping, idx);
 692                if (filepage &&
 693                    (!Page_Uptodate(filepage) || TryLockPage(filepage))) {
 694                        spin_unlock(&info->lock);
 695                        wait_on_page(filepage);
 696                        page_cache_release(filepage);
 697                        filepage = NULL;
 698                        goto repeat;
 699                }
 700                spin_unlock(&info->lock);
 701        } else {
 702                sbinfo = SHMEM_SB(inode->i_sb);
 703                spin_lock(&sbinfo->stat_lock);
 704                if (sbinfo->free_blocks == 0) {
 705                        spin_unlock(&sbinfo->stat_lock);
 706                        spin_unlock(&info->lock);
 707                        error = -ENOSPC;
 708                        goto failed;
 709                }
 710                sbinfo->free_blocks--;
 711                inode->i_blocks += BLOCKS_PER_PAGE;
 712                spin_unlock(&sbinfo->stat_lock);
 713
 714                if (!filepage) {
 715                        spin_unlock(&info->lock);
 716                        filepage = page_cache_alloc(mapping);
 717                        if (!filepage) {
 718                                shmem_free_block(inode);
 719                                error = -ENOMEM;
 720                                goto failed;
 721                        }
 722
 723                        spin_lock(&info->lock);
 724                        entry = shmem_swp_alloc(info, idx, sgp);
 725                        if (IS_ERR(entry))
 726                                error = PTR_ERR(entry);
 727                        if (error || entry->val ||
 728                            add_to_page_cache_unique(filepage,
 729                            mapping, idx, page_hash(mapping, idx)) != 0) {
 730                                spin_unlock(&info->lock);
 731                                page_cache_release(filepage);
 732                                shmem_free_block(inode);
 733                                filepage = NULL;
 734                                if (error)
 735                                        goto failed;
 736                                goto repeat;
 737                        }
 738                }
 739
 740                spin_unlock(&info->lock);
 741                clear_highpage(filepage);
 742                flush_dcache_page(filepage);
 743                SetPageUptodate(filepage);
 744        }
 745done:
 746        if (!*pagep) {
 747                if (filepage) {
 748                        UnlockPage(filepage);
 749                        *pagep = filepage;
 750                } else
 751                        *pagep = ZERO_PAGE(0);
 752        }
 753        return 0;
 754
 755failed:
 756        if (*pagep != filepage) {
 757                UnlockPage(filepage);
 758                page_cache_release(filepage);
 759        }
 760        return error;
 761}
 762
 763struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
 764{
 765        struct inode *inode = vma->vm_file->f_dentry->d_inode;
 766        struct page *page = NULL;
 767        unsigned long idx;
 768        int error;
 769
 770        idx = (address - vma->vm_start) >> PAGE_SHIFT;
 771        idx += vma->vm_pgoff;
 772        idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
 773
 774        error = shmem_getpage(inode, idx, &page, SGP_CACHE);
 775        if (error)
 776                return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
 777
 778        mark_page_accessed(page);
 779        flush_page_to_ram(page);
 780        return page;
 781}
 782
 783void shmem_lock(struct file *file, int lock)
 784{
 785        struct inode *inode = file->f_dentry->d_inode;
 786        struct shmem_inode_info *info = SHMEM_I(inode);
 787
 788        spin_lock(&info->lock);
 789        if (lock)
 790                info->flags |= VM_LOCKED;
 791        else
 792                info->flags &= ~VM_LOCKED;
 793        spin_unlock(&info->lock);
 794}
 795
 796static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 797{
 798        struct vm_operations_struct *ops;
 799        struct inode *inode = file->f_dentry->d_inode;
 800
 801        ops = &shmem_vm_ops;
 802        if (!S_ISREG(inode->i_mode))
 803                return -EACCES;
 804        UPDATE_ATIME(inode);
 805        vma->vm_ops = ops;
 806        return 0;
 807}
 808
 809static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 810{
 811        struct inode *inode;
 812        struct shmem_inode_info *info;
 813        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 814
 815        spin_lock(&sbinfo->stat_lock);
 816        if (!sbinfo->free_inodes) {
 817                spin_unlock(&sbinfo->stat_lock);
 818                return NULL;
 819        }
 820        sbinfo->free_inodes--;
 821        spin_unlock(&sbinfo->stat_lock);
 822
 823        inode = new_inode(sb);
 824        if (inode) {
 825                inode->i_mode = mode;
 826                inode->i_uid = current->fsuid;
 827                inode->i_gid = current->fsgid;
 828                inode->i_blksize = PAGE_CACHE_SIZE;
 829                inode->i_blocks = 0;
 830                inode->i_rdev = NODEV;
 831                inode->i_mapping->a_ops = &shmem_aops;
 832                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 833                info = SHMEM_I(inode);
 834                info->inode = inode;
 835                spin_lock_init(&info->lock);
 836                switch (mode & S_IFMT) {
 837                default:
 838                        init_special_inode(inode, mode, dev);
 839                        break;
 840                case S_IFREG:
 841                        inode->i_op = &shmem_inode_operations;
 842                        inode->i_fop = &shmem_file_operations;
 843                        spin_lock(&shmem_ilock);
 844                        list_add_tail(&info->list, &shmem_inodes);
 845                        spin_unlock(&shmem_ilock);
 846                        break;
 847                case S_IFDIR:
 848                        inode->i_nlink++;
 849                        /* Some things misbehave if size == 0 on a directory */
 850                        inode->i_size = 2 * BOGO_DIRENT_SIZE;
 851                        inode->i_op = &shmem_dir_inode_operations;
 852                        inode->i_fop = &dcache_dir_ops;
 853                        break;
 854                case S_IFLNK:
 855                        break;
 856                }
 857        }
 858        return inode;
 859}
 860
 861static int shmem_set_size(struct shmem_sb_info *info,
 862                          unsigned long max_blocks, unsigned long max_inodes)
 863{
 864        int error;
 865        unsigned long blocks, inodes;
 866
 867        spin_lock(&info->stat_lock);
 868        blocks = info->max_blocks - info->free_blocks;
 869        inodes = info->max_inodes - info->free_inodes;
 870        error = -EINVAL;
 871        if (max_blocks < blocks)
 872                goto out;
 873        if (max_inodes < inodes)
 874                goto out;
 875        error = 0;
 876        info->max_blocks  = max_blocks;
 877        info->free_blocks = max_blocks - blocks;
 878        info->max_inodes  = max_inodes;
 879        info->free_inodes = max_inodes - inodes;
 880out:
 881        spin_unlock(&info->stat_lock);
 882        return error;
 883}
 884
 885#ifdef CONFIG_TMPFS
 886
 887static struct inode_operations shmem_symlink_inode_operations;
 888static struct inode_operations shmem_symlink_inline_operations;
 889
 890/*
 891 * tmpfs itself makes no use of generic_file_read, generic_file_mmap
 892 * or generic_file_write; but shmem_readpage, shmem_prepare_write and
 893 * shmem_commit_write let a tmpfs file be used below the loop driver,
 894 * and shmem_readpage lets a tmpfs file be used by sendfile.
 895 */
 896static int
 897shmem_readpage(struct file *file, struct page *page)
 898{
 899        struct inode *inode = page->mapping->host;
 900        int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
 901        UnlockPage(page);
 902        return error;
 903}
 904
 905static int
 906shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 907{
 908        struct inode *inode = page->mapping->host;
 909        return shmem_getpage(inode, page->index, &page, SGP_WRITE);
 910}
 911
 912static int
 913shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 914{
 915        struct inode *inode = page->mapping->host;
 916        loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 917
 918        if (pos > inode->i_size)
 919                inode->i_size = pos;
 920        SetPageDirty(page);
 921        return 0;
 922}
 923
 924static ssize_t
 925shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 926{
 927        struct inode    *inode = file->f_dentry->d_inode;
 928        loff_t          pos;
 929        unsigned long   written;
 930        int             err;
 931
 932        if ((ssize_t) count < 0)
 933                return -EINVAL;
 934
 935        if (!access_ok(VERIFY_READ, buf, count))
 936                return -EFAULT;
 937
 938        down(&inode->i_sem);
 939
 940        pos = *ppos;
 941        written = 0;
 942
 943        err = precheck_file_write(file, inode, &count, &pos);
 944        if (err || !count)
 945                goto out;
 946
 947        remove_suid(inode);
 948        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 949
 950        do {
 951                struct page *page = NULL;
 952                unsigned long bytes, index, offset;
 953                char *kaddr;
 954                int left;
 955
 956                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
 957                index = pos >> PAGE_CACHE_SHIFT;
 958                bytes = PAGE_CACHE_SIZE - offset;
 959                if (bytes > count)
 960                        bytes = count;
 961
 962                /*
 963                 * We don't hold page lock across copy from user -
 964                 * what would it guard against? - so no deadlock here.
 965                 */
 966
 967                err = shmem_getpage(inode, index, &page, SGP_WRITE);
 968                if (err)
 969                        break;
 970
 971                kaddr = kmap(page);
 972                left = __copy_from_user(kaddr + offset, buf, bytes);
 973                kunmap(page);
 974
 975                written += bytes;
 976                count -= bytes;
 977                pos += bytes;
 978                buf += bytes;
 979                if (pos > inode->i_size)
 980                        inode->i_size = pos;
 981
 982                flush_dcache_page(page);
 983                SetPageDirty(page);
 984                SetPageReferenced(page);
 985                page_cache_release(page);
 986
 987                if (left) {
 988                        pos -= left;
 989                        written -= left;
 990                        err = -EFAULT;
 991                        break;
 992                }
 993        } while (count);
 994
 995        *ppos = pos;
 996        if (written)
 997                err = written;
 998out:
 999        up(&inode->i_sem);
1000        return err;
1001}
1002
1003static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
1004{
1005        struct inode *inode = filp->f_dentry->d_inode;
1006        struct address_space *mapping = inode->i_mapping;
1007        unsigned long index, offset;
1008
1009        index = *ppos >> PAGE_CACHE_SHIFT;
1010        offset = *ppos & ~PAGE_CACHE_MASK;
1011
1012        for (;;) {
1013                struct page *page = NULL;
1014                unsigned long end_index, nr, ret;
1015
1016                end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1017                if (index > end_index)
1018                        break;
1019                if (index == end_index) {
1020                        nr = inode->i_size & ~PAGE_CACHE_MASK;
1021                        if (nr <= offset)
1022                                break;
1023                }
1024
1025                desc->error = shmem_getpage(inode, index, &page, SGP_READ);
1026                if (desc->error) {
1027                        if (desc->error == -EINVAL)
1028                                desc->error = 0;
1029                        break;
1030                }
1031
1032                /*
1033                 * We must evaluate after, since reads (unlike writes)
1034                 * are called without i_sem protection against truncate
1035                 */
1036                nr = PAGE_CACHE_SIZE;
1037                end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1038                if (index == end_index) {
1039                        nr = inode->i_size & ~PAGE_CACHE_MASK;
1040                        if (nr <= offset) {
1041                                page_cache_release(page);
1042                                break;
1043                        }
1044                }
1045                nr -= offset;
1046
1047                if (page != ZERO_PAGE(0)) {
1048                        /*
1049                         * If users can be writing to this page using arbitrary
1050                         * virtual addresses, take care about potential aliasing
1051                         * before reading the page on the kernel side.
1052                         */
1053                        if (mapping->i_mmap_shared != NULL)
1054                                flush_dcache_page(page);
1055                        /*
1056                         * Mark the page accessed if we read the
1057                         * beginning or we just did an lseek.
1058                         */
1059                        if (!offset || !filp->f_reada)
1060                                mark_page_accessed(page);
1061                }
1062
1063                /*
1064                 * Ok, we have the page, and it's up-to-date, so
1065                 * now we can copy it to user space...
1066                 *
1067                 * The actor routine returns how many bytes were actually used..
1068                 * NOTE! This may not be the same as how much of a user buffer
1069                 * we filled up (we may be padding etc), so we can only update
1070                 * "pos" here (the actor routine has to update the user buffer
1071                 * pointers and the remaining count).
1072                 */
1073                ret = file_read_actor(desc, page, offset, nr);
1074                offset += ret;
1075                index += offset >> PAGE_CACHE_SHIFT;
1076                offset &= ~PAGE_CACHE_MASK;
1077
1078                page_cache_release(page);
1079                if (ret != nr || !desc->count)
1080                        break;
1081        }
1082
1083        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1084        filp->f_reada = 1;
1085        UPDATE_ATIME(inode);
1086}
1087
1088static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
1089{
1090        read_descriptor_t desc;
1091
1092        if ((ssize_t) count < 0)
1093                return -EINVAL;
1094        if (!access_ok(VERIFY_WRITE, buf, count))
1095                return -EFAULT;
1096        if (!count)
1097                return 0;
1098
1099        desc.written = 0;
1100        desc.count = count;
1101        desc.buf = buf;
1102        desc.error = 0;
1103
1104        do_shmem_file_read(filp, ppos, &desc);
1105        if (desc.written)
1106                return desc.written;
1107        return desc.error;
1108}
1109
1110static int shmem_statfs(struct super_block *sb, struct statfs *buf)
1111{
1112        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1113
1114        buf->f_type = TMPFS_MAGIC;
1115        buf->f_bsize = PAGE_CACHE_SIZE;
1116        spin_lock(&sbinfo->stat_lock);
1117        buf->f_blocks = sbinfo->max_blocks;
1118        buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1119        buf->f_files = sbinfo->max_inodes;
1120        buf->f_ffree = sbinfo->free_inodes;
1121        spin_unlock(&sbinfo->stat_lock);
1122        buf->f_namelen = NAME_MAX;
1123        return 0;
1124}
1125
1126/*
1127 * Lookup the data. This is trivial - if the dentry didn't already
1128 * exist, we know it is negative.
1129 */
1130static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry)
1131{
1132        d_add(dentry, NULL);
1133        return NULL;
1134}
1135
1136/*
1137 * File creation. Allocate an inode, and we're done..
1138 */
1139static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
1140{
1141        struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1142        int error = -ENOSPC;
1143
1144        if (inode) {
1145                dir->i_size += BOGO_DIRENT_SIZE;
1146                dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1147                d_instantiate(dentry, inode);
1148                dget(dentry); /* Extra count - pin the dentry in core */
1149                error = 0;
1150        }
1151        return error;
1152}
1153
1154static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1155{
1156        int error;
1157
1158        if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1159                return error;
1160        dir->i_nlink++;
1161        return 0;
1162}
1163
1164static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
1165{
1166        return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1167}
1168
1169/*
1170 * Link a file..
1171 */
1172static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1173{
1174        struct inode *inode = old_dentry->d_inode;
1175
1176        if (S_ISDIR(inode->i_mode))
1177                return -EPERM;
1178
1179        dir->i_size += BOGO_DIRENT_SIZE;
1180        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1181        inode->i_nlink++;
1182        atomic_inc(&inode->i_count);    /* New dentry reference */
1183        dget(dentry);           /* Extra pinning count for the created dentry */
1184        d_instantiate(dentry, inode);
1185        return 0;
1186}
1187
1188static inline int shmem_positive(struct dentry *dentry)
1189{
1190        return dentry->d_inode && !d_unhashed(dentry);
1191}
1192
1193/*
1194 * Check that a directory is empty (this works
1195 * for regular files too, they'll just always be
1196 * considered empty..).
1197 *
1198 * Note that an empty directory can still have
1199 * children, they just all have to be negative..
1200 */
1201static int shmem_empty(struct dentry *dentry)
1202{
1203        struct list_head *list;
1204
1205        spin_lock(&dcache_lock);
1206        list = dentry->d_subdirs.next;
1207
1208        while (list != &dentry->d_subdirs) {
1209                struct dentry *de = list_entry(list, struct dentry, d_child);
1210
1211                if (shmem_positive(de)) {
1212                        spin_unlock(&dcache_lock);
1213                        return 0;
1214                }
1215                list = list->next;
1216        }
1217        spin_unlock(&dcache_lock);
1218        return 1;
1219}
1220
1221static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1222{
1223        struct inode *inode = dentry->d_inode;
1224
1225        dir->i_size -= BOGO_DIRENT_SIZE;
1226        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1227        inode->i_nlink--;
1228        dput(dentry);   /* Undo the count from "create" - this does all the work */
1229        return 0;
1230}
1231
1232static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1233{
1234        if (!shmem_empty(dentry))
1235                return -ENOTEMPTY;
1236
1237        dir->i_nlink--;
1238        return shmem_unlink(dir, dentry);
1239}
1240
1241/*
1242 * The VFS layer already does all the dentry stuff for rename,
1243 * we just have to decrement the usage count for the target if
1244 * it exists so that the VFS layer correctly free's it when it
1245 * gets overwritten.
1246 */
1247static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1248{
1249        struct inode *inode = old_dentry->d_inode;
1250        int they_are_dirs = S_ISDIR(inode->i_mode);
1251
1252        if (!shmem_empty(new_dentry))
1253                return -ENOTEMPTY;
1254
1255        if (new_dentry->d_inode) {
1256                (void) shmem_unlink(new_dir, new_dentry);
1257                if (they_are_dirs)
1258                        old_dir->i_nlink--;
1259        } else if (they_are_dirs) {
1260                old_dir->i_nlink--;
1261                new_dir->i_nlink++;
1262        }
1263
1264        old_dir->i_size -= BOGO_DIRENT_SIZE;
1265        new_dir->i_size += BOGO_DIRENT_SIZE;
1266        old_dir->i_ctime = old_dir->i_mtime =
1267        new_dir->i_ctime = new_dir->i_mtime =
1268        inode->i_ctime = CURRENT_TIME;
1269        return 0;
1270}
1271
1272static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1273{
1274        int error;
1275        int len;
1276        struct inode *inode;
1277        struct page *page = NULL;
1278        char *kaddr;
1279        struct shmem_inode_info *info;
1280
1281        len = strlen(symname) + 1;
1282        if (len > PAGE_CACHE_SIZE)
1283                return -ENAMETOOLONG;
1284
1285        inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1286        if (!inode)
1287                return -ENOSPC;
1288
1289        info = SHMEM_I(inode);
1290        inode->i_size = len-1;
1291        if (len <= sizeof(struct shmem_inode_info)) {
1292                /* do it inline */
1293                memcpy(info, symname, len);
1294                inode->i_op = &shmem_symlink_inline_operations;
1295        } else {
1296                error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1297                if (error) {
1298                        iput(inode);
1299                        return error;
1300                }
1301                inode->i_op = &shmem_symlink_inode_operations;
1302                spin_lock(&shmem_ilock);
1303                list_add_tail(&info->list, &shmem_inodes);
1304                spin_unlock(&shmem_ilock);
1305                kaddr = kmap(page);
1306                memcpy(kaddr, symname, len);
1307                kunmap(page);
1308                SetPageDirty(page);
1309                page_cache_release(page);
1310        }
1311        dir->i_size += BOGO_DIRENT_SIZE;
1312        dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1313        d_instantiate(dentry, inode);
1314        dget(dentry);
1315        return 0;
1316}
1317
1318static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
1319{
1320        return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1321}
1322
1323static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1324{
1325        return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1326}
1327
1328static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
1329{
1330        struct page *page = NULL;
1331        int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1332        if (res)
1333                return res;
1334        res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1335        kunmap(page);
1336        mark_page_accessed(page);
1337        page_cache_release(page);
1338        return res;
1339}
1340
1341static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1342{
1343        struct page *page = NULL;
1344        int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1345        if (res)
1346                return res;
1347        res = vfs_follow_link(nd, kmap(page));
1348        kunmap(page);
1349        mark_page_accessed(page);
1350        page_cache_release(page);
1351        return res;
1352}
1353
1354static struct inode_operations shmem_symlink_inline_operations = {
1355        readlink:       shmem_readlink_inline,
1356        follow_link:    shmem_follow_link_inline,
1357};
1358
1359static struct inode_operations shmem_symlink_inode_operations = {
1360        truncate:       shmem_truncate,
1361        readlink:       shmem_readlink,
1362        follow_link:    shmem_follow_link,
1363};
1364
1365static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1366{
1367        char *this_char, *value, *rest;
1368
1369        while ((this_char = strsep(&options, ",")) != NULL) {
1370                if (!*this_char)
1371                        continue;
1372                if ((value = strchr(this_char,'=')) != NULL) {
1373                        *value++ = 0;
1374                } else {
1375                        printk(KERN_ERR
1376                            "tmpfs: No value for mount option '%s'\n",
1377                            this_char);
1378                        return 1;
1379                }
1380
1381                if (!strcmp(this_char,"size")) {
1382                        unsigned long long size;
1383                        size = memparse(value,&rest);
1384                        if (*rest == '%') {
1385                                struct sysinfo si;
1386                                si_meminfo(&si);
1387                                size <<= PAGE_SHIFT;
1388                                size *= si.totalram;
1389                                do_div(size, 100);
1390                                rest++;
1391                        }
1392                        if (*rest)
1393                                goto bad_val;
1394                        *blocks = size >> PAGE_CACHE_SHIFT;
1395                } else if (!strcmp(this_char,"nr_blocks")) {
1396                        *blocks = memparse(value,&rest);
1397                        if (*rest)
1398                                goto bad_val;
1399                } else if (!strcmp(this_char,"nr_inodes")) {
1400                        *inodes = memparse(value,&rest);
1401                        if (*rest)
1402                                goto bad_val;
1403                } else if (!strcmp(this_char,"mode")) {
1404                        if (!mode)
1405                                continue;
1406                        *mode = simple_strtoul(value,&rest,8);
1407                        if (*rest)
1408                                goto bad_val;
1409                } else if (!strcmp(this_char,"uid")) {
1410                        if (!uid)
1411                                continue;
1412                        *uid = simple_strtoul(value,&rest,0);
1413                        if (*rest)
1414                                goto bad_val;
1415                } else if (!strcmp(this_char,"gid")) {
1416                        if (!gid)
1417                                continue;
1418                        *gid = simple_strtoul(value,&rest,0);
1419                        if (*rest)
1420                                goto bad_val;
1421                } else {
1422                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1423                               this_char);
1424                        return 1;
1425                }
1426        }
1427        return 0;
1428
1429bad_val:
1430        printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1431               value, this_char);
1432        return 1;
1433}
1434
1435static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1436{
1437        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1438        unsigned long max_blocks = sbinfo->max_blocks;
1439        unsigned long max_inodes = sbinfo->max_inodes;
1440
1441        if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1442                return -EINVAL;
1443        return shmem_set_size(sbinfo, max_blocks, max_inodes);
1444}
1445
1446static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
1447{
1448        return 0;
1449}
1450#endif
1451
1452static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent)
1453{
1454        struct inode *inode;
1455        struct dentry *root;
1456        unsigned long blocks, inodes;
1457        int mode   = S_IRWXUGO | S_ISVTX;
1458        uid_t uid = current->fsuid;
1459        gid_t gid = current->fsgid;
1460        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1461        struct sysinfo si;
1462
1463        /*
1464         * Per default we only allow half of the physical ram per
1465         * tmpfs instance
1466         */
1467        si_meminfo(&si);
1468        blocks = inodes = si.totalram / 2;
1469
1470#ifdef CONFIG_TMPFS
1471        if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes))
1472                return NULL;
1473#endif
1474
1475        spin_lock_init(&sbinfo->stat_lock);
1476        sbinfo->max_blocks = blocks;
1477        sbinfo->free_blocks = blocks;
1478        sbinfo->max_inodes = inodes;
1479        sbinfo->free_inodes = inodes;
1480        sb->s_maxbytes = SHMEM_MAX_BYTES;
1481        sb->s_blocksize = PAGE_CACHE_SIZE;
1482        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1483        sb->s_magic = TMPFS_MAGIC;
1484        sb->s_op = &shmem_ops;
1485        inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1486        if (!inode)
1487                return NULL;
1488
1489        inode->i_uid = uid;
1490        inode->i_gid = gid;
1491        root = d_alloc_root(inode);
1492        if (!root) {
1493                iput(inode);
1494                return NULL;
1495        }
1496        sb->s_root = root;
1497        return sb;
1498}
1499
1500static struct address_space_operations shmem_aops = {
1501        removepage:     shmem_removepage,
1502        writepage:      shmem_writepage,
1503#ifdef CONFIG_TMPFS
1504        readpage:       shmem_readpage,
1505        prepare_write:  shmem_prepare_write,
1506        commit_write:   shmem_commit_write,
1507#endif
1508};
1509
1510static struct file_operations shmem_file_operations = {
1511        mmap:           shmem_mmap,
1512#ifdef CONFIG_TMPFS
1513        read:           shmem_file_read,
1514        write:          shmem_file_write,
1515        fsync:          shmem_sync_file,
1516#endif
1517};
1518
1519static struct inode_operations shmem_inode_operations = {
1520        truncate:       shmem_truncate,
1521        setattr:        shmem_notify_change,
1522};
1523
1524static struct inode_operations shmem_dir_inode_operations = {
1525#ifdef CONFIG_TMPFS
1526        create:         shmem_create,
1527        lookup:         shmem_lookup,
1528        link:           shmem_link,
1529        unlink:         shmem_unlink,
1530        symlink:        shmem_symlink,
1531        mkdir:          shmem_mkdir,
1532        rmdir:          shmem_rmdir,
1533        mknod:          shmem_mknod,
1534        rename:         shmem_rename,
1535#endif
1536};
1537
1538static struct super_operations shmem_ops = {
1539#ifdef CONFIG_TMPFS
1540        statfs:         shmem_statfs,
1541        remount_fs:     shmem_remount_fs,
1542#endif
1543        delete_inode:   shmem_delete_inode,
1544        put_inode:      force_delete,
1545};
1546
1547static struct vm_operations_struct shmem_vm_ops = {
1548        nopage:         shmem_nopage,
1549};
1550
1551#ifdef CONFIG_TMPFS
1552/* type "shm" will be tagged obsolete in 2.5 */
1553static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1554static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1555#else
1556static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1557#endif
1558static struct vfsmount *shm_mnt;
1559
1560static int __init init_tmpfs(void)
1561{
1562        int error;
1563
1564        error = register_filesystem(&tmpfs_fs_type);
1565        if (error) {
1566                printk(KERN_ERR "Could not register tmpfs\n");
1567                goto out3;
1568        }
1569#ifdef CONFIG_TMPFS
1570        error = register_filesystem(&shmem_fs_type);
1571        if (error) {
1572                printk(KERN_ERR "Could not register shm fs\n");
1573                goto out2;
1574        }
1575        devfs_mk_dir(NULL, "shm", NULL);
1576#endif
1577        shm_mnt = kern_mount(&tmpfs_fs_type);
1578        if (IS_ERR(shm_mnt)) {
1579                error = PTR_ERR(shm_mnt);
1580                printk(KERN_ERR "Could not kern_mount tmpfs\n");
1581                goto out1;
1582        }
1583
1584        /* The internal instance should not do size checking */
1585        shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
1586        return 0;
1587
1588out1:
1589#ifdef CONFIG_TMPFS
1590        unregister_filesystem(&shmem_fs_type);
1591out2:
1592#endif
1593        unregister_filesystem(&tmpfs_fs_type);
1594out3:
1595        shm_mnt = ERR_PTR(error);
1596        return error;
1597}
1598module_init(init_tmpfs)
1599
1600/*
1601 * shmem_file_setup - get an unlinked file living in tmpfs
1602 *
1603 * @name: name for dentry (to be seen in /proc/<pid>/maps
1604 * @size: size to be set for the file
1605 *
1606 */
1607struct file *shmem_file_setup(char *name, loff_t size)
1608{
1609        int error;
1610        struct file *file;
1611        struct inode *inode;
1612        struct dentry *dentry, *root;
1613        struct qstr this;
1614        int vm_enough_memory(long pages);
1615
1616        if (IS_ERR(shm_mnt))
1617                return (void *)shm_mnt;
1618
1619        if (size > SHMEM_MAX_BYTES)
1620                return ERR_PTR(-EINVAL);
1621
1622        if (!vm_enough_memory(VM_ACCT(size)))
1623                return ERR_PTR(-ENOMEM);
1624
1625        this.name = name;
1626        this.len = strlen(name);
1627        this.hash = 0; /* will go */
1628        root = shm_mnt->mnt_root;
1629        dentry = d_alloc(root, &this);
1630        if (!dentry)
1631                return ERR_PTR(-ENOMEM);
1632
1633        error = -ENFILE;
1634        file = get_empty_filp();
1635        if (!file)
1636                goto put_dentry;
1637
1638        error = -ENOSPC;
1639        inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1640        if (!inode)
1641                goto close_file;
1642
1643        d_instantiate(dentry, inode);
1644        inode->i_size = size;
1645        inode->i_nlink = 0;     /* It is unlinked */
1646        file->f_vfsmnt = mntget(shm_mnt);
1647        file->f_dentry = dentry;
1648        file->f_op = &shmem_file_operations;
1649        file->f_mode = FMODE_WRITE | FMODE_READ;
1650        return file;
1651
1652close_file:
1653        put_filp(file);
1654put_dentry:
1655        dput(dentry);
1656        return ERR_PTR(error);
1657}
1658
1659/*
1660 * shmem_zero_setup - setup a shared anonymous mapping
1661 *
1662 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1663 */
1664int shmem_zero_setup(struct vm_area_struct *vma)
1665{
1666        struct file *file;
1667        loff_t size = vma->vm_end - vma->vm_start;
1668
1669        file = shmem_file_setup("dev/zero", size);
1670        if (IS_ERR(file))
1671                return PTR_ERR(file);
1672
1673        if (vma->vm_file)
1674                fput(vma->vm_file);
1675        vma->vm_file = file;
1676        vma->vm_ops = &shmem_vm_ops;
1677        return 0;
1678}
1679
1680EXPORT_SYMBOL(shmem_file_setup);
1681
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.