linux-bk/mm/shmem.c
<<
>>
Prefs
   1/*
   2 * Resizable virtual memory filesystem for Linux.
   3 *
   4 * Copyright (C) 2000 Linus Torvalds.
   5 *               2000 Transmeta Corp.
   6 *               2000-2001 Christoph Rohland
   7 *               2000-2001 SAP AG
   8 *               2002 Red Hat Inc.
   9 *
  10 * This file is released under the GPL.
  11 */
  12
  13/*
  14 * This virtual memory filesystem is heavily based on the ramfs. It
  15 * extends ramfs by the ability to use swap and honor resource limits
  16 * which makes it a completely usable filesystem.
  17 */
  18
  19#include <linux/config.h>
  20#include <linux/module.h>
  21#include <linux/init.h>
  22#include <linux/devfs_fs_kernel.h>
  23#include <linux/fs.h>
  24#include <linux/mm.h>
  25#include <linux/mman.h>
  26#include <linux/file.h>
  27#include <linux/swap.h>
  28#include <linux/pagemap.h>
  29#include <linux/string.h>
  30#include <linux/slab.h>
  31#include <linux/backing-dev.h>
  32#include <linux/shmem_fs.h>
  33
  34#include <asm/uaccess.h>
  35
  36/* This magic number is used in glibc for posix shared memory */
  37#define TMPFS_MAGIC     0x01021994
  38
  39#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
  40#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
  41
  42#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * (ENTRIES_PER_PAGE/2) * (ENTRIES_PER_PAGE+1))
  43#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
  44
  45#define VM_ACCT(size)    (((size) + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT)
  46
  47static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
  48{
  49        return sb->u.generic_sbp;
  50}
  51
  52static struct super_operations shmem_ops;
  53static struct address_space_operations shmem_aops;
  54static struct file_operations shmem_file_operations;
  55static struct inode_operations shmem_inode_operations;
  56static struct inode_operations shmem_dir_inode_operations;
  57static struct vm_operations_struct shmem_vm_ops;
  58
  59static struct backing_dev_info shmem_backing_dev_info = {
  60        .ra_pages       = 0,    /* No readahead */
  61        .memory_backed  = 1,    /* Does not contribute to dirty memory */
  62};
  63
  64LIST_HEAD (shmem_inodes);
  65static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
  66atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
  67
  68static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long);
  69
  70/*
  71 * shmem_recalc_inode - recalculate the size of an inode
  72 *
  73 * @inode: inode to recalc
  74 * @swap:  additional swap pages freed externally
  75 *
  76 * We have to calculate the free blocks since the mm can drop pages
  77 * behind our back
  78 *
  79 * But we know that normally
  80 * inodes->i_blocks/BLOCKS_PER_PAGE == 
  81 *                      inode->i_mapping->nrpages + info->swapped
  82 *
  83 * So the mm freed 
  84 * inodes->i_blocks/BLOCKS_PER_PAGE - 
  85 *                      (inode->i_mapping->nrpages + info->swapped)
  86 *
  87 * It has to be called with the spinlock held.
  88 */
  89
  90static void shmem_recalc_inode(struct inode * inode)
  91{
  92        unsigned long freed;
  93
  94        freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
  95                (inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
  96        if (freed){
  97                struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
  98                inode->i_blocks -= freed*BLOCKS_PER_PAGE;
  99                spin_lock (&sbinfo->stat_lock);
 100                sbinfo->free_blocks += freed;
 101                spin_unlock (&sbinfo->stat_lock);
 102        }
 103}
 104
 105/*
 106 * shmem_swp_entry - find the swap vector position in the info structure
 107 *
 108 * @info:  info structure for the inode
 109 * @index: index of the page to find
 110 * @page:  optional page to add to the structure. Has to be preset to
 111 *         all zeros
 112 *
 113 * If there is no space allocated yet it will return -ENOMEM when
 114 * page == 0 else it will use the page for the needed block.
 115 *
 116 * returns -EFBIG if the index is too big.
 117 *
 118 *
 119 * The swap vector is organized the following way:
 120 *
 121 * There are SHMEM_NR_DIRECT entries directly stored in the
 122 * shmem_inode_info structure. So small files do not need an addional
 123 * allocation.
 124 *
 125 * For pages with index > SHMEM_NR_DIRECT there is the pointer
 126 * i_indirect which points to a page which holds in the first half
 127 * doubly indirect blocks, in the second half triple indirect blocks:
 128 *
 129 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
 130 * following layout (for SHMEM_NR_DIRECT == 16):
 131 *
 132 * i_indirect -> dir --> 16-19
 133 *            |      +-> 20-23
 134 *            |
 135 *            +-->dir2 --> 24-27
 136 *            |        +-> 28-31
 137 *            |        +-> 32-35
 138 *            |        +-> 36-39
 139 *            |
 140 *            +-->dir3 --> 40-43
 141 *                     +-> 44-47
 142 *                     +-> 48-51
 143 *                     +-> 52-55
 144 */
 145static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, unsigned long page) 
 146{
 147        unsigned long offset;
 148        void **dir;
 149
 150        if (index < SHMEM_NR_DIRECT)
 151                return info->i_direct+index;
 152
 153        index -= SHMEM_NR_DIRECT;
 154        offset = index % ENTRIES_PER_PAGE;
 155        index /= ENTRIES_PER_PAGE;
 156
 157        if (!info->i_indirect) {
 158                info->i_indirect = (void *) page;
 159                return ERR_PTR(-ENOMEM);
 160        }
 161
 162        dir = info->i_indirect + index;
 163        if (index >= ENTRIES_PER_PAGE/2) {
 164                index -= ENTRIES_PER_PAGE/2;
 165                dir = info->i_indirect + ENTRIES_PER_PAGE/2 
 166                        + index/ENTRIES_PER_PAGE;
 167                index %= ENTRIES_PER_PAGE;
 168
 169                if(!*dir) {
 170                        *dir = (void *) page;
 171                        /* We return since we will need another page
 172                           in the next step */
 173                        return ERR_PTR(-ENOMEM);
 174                }
 175                dir = ((void **)*dir) + index;
 176        }
 177        if (!*dir) {
 178                if (!page)
 179                        return ERR_PTR(-ENOMEM);
 180                *dir = (void *)page;
 181        }
 182        return ((swp_entry_t *)*dir) + offset;
 183}
 184
 185/*
 186 * shmem_alloc_entry - get the position of the swap entry for the
 187 *                     page. If it does not exist allocate the entry
 188 *
 189 * @info:       info structure for the inode
 190 * @index:      index of the page to find
 191 */
 192static inline swp_entry_t * shmem_alloc_entry (struct shmem_inode_info *info, unsigned long index)
 193{
 194        unsigned long page = 0;
 195        swp_entry_t * res;
 196
 197        if (index >= SHMEM_MAX_INDEX)
 198                return ERR_PTR(-EFBIG);
 199
 200        if (info->next_index <= index)
 201                info->next_index = index + 1;
 202
 203        while ((res = shmem_swp_entry(info,index,page)) == ERR_PTR(-ENOMEM)) {
 204                page = get_zeroed_page(GFP_USER);
 205                if (!page)
 206                        break;
 207        }
 208        return res;
 209}
 210
 211/*
 212 * shmem_free_swp - free some swap entries in a directory
 213 *
 214 * @dir:   pointer to the directory
 215 * @count: number of entries to scan
 216 */
 217static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
 218{
 219        swp_entry_t *ptr, entry;
 220        int freed = 0;
 221
 222        for (ptr = dir; ptr < dir + count; ptr++) {
 223                if (!ptr->val)
 224                        continue;
 225                entry = *ptr;
 226                *ptr = (swp_entry_t){0};
 227                freed++;
 228                free_swap_and_cache(entry);
 229        }
 230        return freed;
 231}
 232
 233/*
 234 * shmem_truncate_direct - free the swap entries of a whole doubly
 235 *                         indirect block
 236 *
 237 * @dir:        pointer to the pointer to the block
 238 * @start:      offset to start from (in pages)
 239 * @len:        how many pages are stored in this block
 240 *
 241 * Returns the number of freed swap entries.
 242 */
 243
 244static inline unsigned long 
 245shmem_truncate_direct(swp_entry_t *** dir, unsigned long start, unsigned long len) {
 246        swp_entry_t **last, **ptr;
 247        unsigned long off, freed = 0;
 248 
 249        if (!*dir)
 250                return 0;
 251
 252        last = *dir + (len + ENTRIES_PER_PAGE-1) / ENTRIES_PER_PAGE;
 253        off = start % ENTRIES_PER_PAGE;
 254
 255        for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++) {
 256                if (!*ptr) {
 257                        off = 0;
 258                        continue;
 259                }
 260
 261                if (!off) {
 262                        freed += shmem_free_swp(*ptr, ENTRIES_PER_PAGE);
 263                        free_page ((unsigned long) *ptr);
 264                        *ptr = 0;
 265                } else {
 266                        freed += shmem_free_swp(*ptr+off,ENTRIES_PER_PAGE-off);
 267                        off = 0;
 268                }
 269        }
 270        
 271        if (!start) {
 272                free_page((unsigned long) *dir);
 273                *dir = 0;
 274        }
 275        return freed;
 276}
 277
 278/*
 279 * shmem_truncate_indirect - truncate an inode
 280 *
 281 * @info:  the info structure of the inode
 282 * @index: the index to truncate
 283 *
 284 * This function locates the last doubly indirect block and calls
 285 * then shmem_truncate_direct to do the real work
 286 */
 287static inline unsigned long
 288shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
 289{
 290        swp_entry_t ***base;
 291        unsigned long baseidx, len, start;
 292        unsigned long max = info->next_index-1;
 293
 294        if (max < SHMEM_NR_DIRECT) {
 295                info->next_index = index;
 296                return shmem_free_swp(info->i_direct + index,
 297                                      SHMEM_NR_DIRECT - index);
 298        }
 299
 300        if (max < ENTRIES_PER_PAGE * ENTRIES_PER_PAGE/2 + SHMEM_NR_DIRECT) {
 301                max -= SHMEM_NR_DIRECT;
 302                base = (swp_entry_t ***) &info->i_indirect;
 303                baseidx = SHMEM_NR_DIRECT;
 304                len = max+1;
 305        } else {
 306                max -= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2+SHMEM_NR_DIRECT;
 307                if (max >= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2)
 308                        BUG();
 309
 310                baseidx = max & ~(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE-1);
 311                base = (swp_entry_t ***) info->i_indirect + ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGE/ENTRIES_PER_PAGE ;
 312                len = max - baseidx + 1;
 313                baseidx += ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2+SHMEM_NR_DIRECT;
 314        }
 315
 316        if (index > baseidx) {
 317                info->next_index = index;
 318                start = index - baseidx;
 319        } else {
 320                info->next_index = baseidx;
 321                start = 0;
 322        }
 323        return shmem_truncate_direct(base, start, len);
 324}
 325
 326static void shmem_truncate (struct inode * inode)
 327{
 328        unsigned long index;
 329        unsigned long partial;
 330        unsigned long freed = 0;
 331        struct shmem_inode_info * info = SHMEM_I(inode);
 332
 333        down(&info->sem);
 334        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 335        spin_lock (&info->lock);
 336        index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 337        partial = inode->i_size & ~PAGE_CACHE_MASK;
 338
 339        if (partial) {
 340                swp_entry_t *entry = shmem_swp_entry(info, index-1, 0);
 341                struct page *page;
 342                /*
 343                 * This check is racy: it's faintly possible that page
 344                 * was assigned to swap during truncate_inode_pages,
 345                 * and now assigned to file; but better than nothing.
 346                 */
 347                if (!IS_ERR(entry) && entry->val) {
 348                        spin_unlock(&info->lock);
 349                        page = shmem_getpage_locked(info, inode, index-1);
 350                        if (!IS_ERR(page)) {
 351                                memclear_highpage_flush(page, partial,
 352                                        PAGE_CACHE_SIZE - partial);
 353                                unlock_page(page);
 354                                page_cache_release(page);
 355                        }
 356                        spin_lock(&info->lock);
 357                }
 358        }
 359
 360        while (index < info->next_index) 
 361                freed += shmem_truncate_indirect(info, index);
 362
 363        info->swapped -= freed;
 364        shmem_recalc_inode(inode);
 365        spin_unlock (&info->lock);
 366        up(&info->sem);
 367}
 368
 369static int shmem_notify_change(struct dentry * dentry, struct iattr *attr)
 370{
 371        struct inode *inode = dentry->d_inode;
 372        long change = 0;
 373        int error;
 374
 375        if ((attr->ia_valid & ATTR_SIZE) && (attr->ia_size <= SHMEM_MAX_BYTES)) {
 376                /*
 377                 * Account swap file usage based on new file size,
 378                 * but just let vmtruncate fail on out-of-range sizes.
 379                 */
 380                change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size);
 381                if (change > 0) {
 382                        if (!vm_enough_memory(change))
 383                                return -ENOMEM;
 384                } else
 385                        vm_unacct_memory(-change);
 386        }
 387
 388        error = inode_change_ok(inode, attr);
 389        if (!error)
 390                error = inode_setattr(inode, attr);
 391        if (error)
 392                vm_unacct_memory(change);
 393        return error;
 394}
 395
 396
 397static void shmem_delete_inode(struct inode * inode)
 398{
 399        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 400        struct shmem_inode_info *info = SHMEM_I(inode);
 401
 402        if (inode->i_op->truncate == shmem_truncate) {
 403                spin_lock(&shmem_ilock);
 404                list_del(&info->list);
 405                spin_unlock(&shmem_ilock);
 406                if (info->flags & VM_ACCOUNT)
 407                        vm_unacct_memory(VM_ACCT(inode->i_size));
 408                inode->i_size = 0;
 409                shmem_truncate (inode);
 410        }
 411        spin_lock (&sbinfo->stat_lock);
 412        sbinfo->free_inodes++;
 413        spin_unlock (&sbinfo->stat_lock);
 414        clear_inode(inode);
 415}
 416
 417static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr)
 418{
 419        swp_entry_t *test;
 420
 421        for (test = ptr; test < eptr; test++) {
 422                if (test->val == entry.val)
 423                        return test - ptr;
 424        }
 425        return -1;
 426}
 427
 428static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 429{
 430        swp_entry_t *ptr;
 431        unsigned long idx;
 432        int offset;
 433        struct inode *inode;
 434
 435        idx = 0;
 436        ptr = info->i_direct;
 437        spin_lock (&info->lock);
 438        offset = info->next_index;
 439        if (offset > SHMEM_NR_DIRECT)
 440                offset = SHMEM_NR_DIRECT;
 441        offset = shmem_find_swp(entry, ptr, ptr + offset);
 442        if (offset >= 0)
 443                goto found;
 444
 445        for (idx = SHMEM_NR_DIRECT; idx < info->next_index; 
 446             idx += ENTRIES_PER_PAGE) {
 447                ptr = shmem_swp_entry(info, idx, 0);
 448                if (IS_ERR(ptr))
 449                        continue;
 450                offset = info->next_index - idx;
 451                if (offset > ENTRIES_PER_PAGE)
 452                        offset = ENTRIES_PER_PAGE;
 453                offset = shmem_find_swp(entry, ptr, ptr + offset);
 454                if (offset >= 0)
 455                        goto found;
 456        }
 457        spin_unlock (&info->lock);
 458        return 0;
 459found:
 460        idx += offset;
 461        inode = igrab(&info->vfs_inode);
 462        /* move head to start search for next from here */
 463        list_move_tail(&shmem_inodes, &info->list);
 464        spin_unlock(&shmem_ilock);
 465        swap_free(entry);
 466        ptr[offset] = (swp_entry_t) {0};
 467
 468        while (inode && move_from_swap_cache(page, idx, inode->i_mapping)) {
 469                /*
 470                 * Yield for kswapd, and try again - but we're still
 471                 * holding the page lock - ugh! fix this up later on.
 472                 * Beware of inode being unlinked or truncated: just
 473                 * leave try_to_unuse to delete_from_swap_cache if so.
 474                 */
 475                spin_unlock(&info->lock);
 476                yield();
 477                spin_lock(&info->lock);
 478                ptr = shmem_swp_entry(info, idx, 0);
 479                if (IS_ERR(ptr))
 480                        break;
 481        }
 482
 483        info->swapped--;
 484        SetPageUptodate(page);
 485        spin_unlock(&info->lock);
 486        if (inode)
 487                iput(inode);
 488        return 1;
 489}
 490
 491/*
 492 * shmem_unuse() search for an eventually swapped out shmem page.
 493 * Note shmem_unuse_inode drops shmem_ilock itself if successful.
 494 */
 495void shmem_unuse(swp_entry_t entry, struct page *page)
 496{
 497        struct list_head *p;
 498        struct shmem_inode_info * info;
 499
 500        spin_lock (&shmem_ilock);
 501        list_for_each(p, &shmem_inodes) {
 502                info = list_entry(p, struct shmem_inode_info, list);
 503
 504                if (info->swapped && shmem_unuse_inode(info, entry, page))
 505                        return;
 506        }
 507        spin_unlock (&shmem_ilock);
 508}
 509
 510/*
 511 * Move the page from the page cache to the swap cache.
 512 *
 513 * The page lock prevents multiple occurences of shmem_writepage at
 514 * once.  We still need to guard against racing with
 515 * shmem_getpage_locked().  
 516 */
 517static int shmem_writepage(struct page * page)
 518{
 519        int err;
 520        struct shmem_inode_info *info;
 521        swp_entry_t *entry, swap;
 522        struct address_space *mapping;
 523        unsigned long index;
 524        struct inode *inode;
 525
 526        if (!PageLocked(page))
 527                BUG();
 528
 529        if (!(current->flags & PF_MEMALLOC))
 530                return fail_writepage(page);
 531
 532        mapping = page->mapping;
 533        index = page->index;
 534        inode = mapping->host;
 535        info = SHMEM_I(inode);
 536        if (info->flags & VM_LOCKED)
 537                return fail_writepage(page);
 538        swap = get_swap_page();
 539        if (!swap.val)
 540                return fail_writepage(page);
 541
 542        spin_lock(&info->lock);
 543        entry = shmem_swp_entry(info, index, 0);
 544        if (IS_ERR(entry))      /* this had been allocated on page allocation */
 545                BUG();
 546        shmem_recalc_inode(inode);
 547        if (entry->val)
 548                BUG();
 549
 550        err = move_to_swap_cache(page, swap);
 551        if (!err) {
 552                *entry = swap;
 553                info->swapped++;
 554                spin_unlock(&info->lock);
 555                SetPageUptodate(page);
 556                set_page_dirty(page);
 557                unlock_page(page);
 558                return 0;
 559        }
 560
 561        spin_unlock(&info->lock);
 562        swap_free(swap);
 563        return fail_writepage(page);
 564}
 565
 566/*
 567 * shmem_getpage_locked - either get the page from swap or allocate a new one
 568 *
 569 * If we allocate a new one we do not mark it dirty. That's up to the
 570 * vm. If we swap it in we mark it dirty since we also free the swap
 571 * entry since a page cannot live in both the swap and page cache
 572 *
 573 * Called with the inode locked, so it cannot race with itself, but we
 574 * still need to guard against racing with shm_writepage(), which might
 575 * be trying to move the page to the swap cache as we run.
 576 */
 577static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode * inode, unsigned long idx)
 578{
 579        struct address_space *mapping = inode->i_mapping;
 580        struct shmem_sb_info *sbinfo;
 581        struct page *page;
 582        swp_entry_t *entry;
 583        int error;
 584
 585repeat:
 586        page = find_lock_page(mapping, idx);
 587        if (page)
 588                return page;
 589
 590        entry = shmem_alloc_entry (info, idx);
 591        if (IS_ERR(entry))
 592                return (void *)entry;
 593
 594        spin_lock (&info->lock);
 595        
 596        /* The shmem_alloc_entry() call may have blocked, and
 597         * shmem_writepage may have been moving a page between the page
 598         * cache and swap cache.  We need to recheck the page cache
 599         * under the protection of the info->lock spinlock. */
 600
 601        page = find_get_page(mapping, idx);
 602        if (page) {
 603                if (TestSetPageLocked(page))
 604                        goto wait_retry;
 605                spin_unlock (&info->lock);
 606                return page;
 607        }
 608        
 609        shmem_recalc_inode(inode);
 610        if (entry->val) {
 611                /* Look it up and read it in.. */
 612                page = find_get_page(&swapper_space, entry->val);
 613                if (!page) {
 614                        swp_entry_t swap = *entry;
 615                        spin_unlock (&info->lock);
 616                        swapin_readahead(*entry);
 617                        page = read_swap_cache_async(*entry);
 618                        if (!page) {
 619                                if (entry->val != swap.val)
 620                                        goto repeat;
 621                                return ERR_PTR(-ENOMEM);
 622                        }
 623                        wait_on_page_locked(page);
 624                        if (!PageUptodate(page) && entry->val == swap.val) {
 625                                page_cache_release(page);
 626                                return ERR_PTR(-EIO);
 627                        }
 628                        
 629                        /* Too bad we can't trust this page, because we
 630                         * dropped the info->lock spinlock */
 631                        page_cache_release(page);
 632                        goto repeat;
 633                }
 634
 635                /* We have to do this with page locked to prevent races */
 636                if (TestSetPageLocked(page))
 637                        goto wait_retry;
 638                if (PageWriteback(page)) {
 639                        spin_unlock(&info->lock);
 640                        wait_on_page_writeback(page);
 641                        unlock_page(page);
 642                        page_cache_release(page);
 643                        goto repeat;
 644                }
 645                error = move_from_swap_cache(page, idx, mapping);
 646                if (error < 0) {
 647                        spin_unlock(&info->lock);
 648                        unlock_page(page);
 649                        page_cache_release(page);
 650                        return ERR_PTR(error);
 651                }
 652
 653                swap_free(*entry);
 654                *entry = (swp_entry_t) {0};
 655                info->swapped--;
 656                spin_unlock (&info->lock);
 657        } else {
 658                sbinfo = SHMEM_SB(inode->i_sb);
 659                spin_unlock (&info->lock);
 660                spin_lock (&sbinfo->stat_lock);
 661                if (sbinfo->free_blocks == 0)
 662                        goto no_space;
 663                sbinfo->free_blocks--;
 664                spin_unlock (&sbinfo->stat_lock);
 665
 666                /* Ok, get a new page.  We don't have to worry about the
 667                 * info->lock spinlock here: we cannot race against
 668                 * shm_writepage because we have already verified that
 669                 * there is no page present either in memory or in the
 670                 * swap cache, so we are guaranteed to be populating a
 671                 * new shm entry.  The inode semaphore we already hold
 672                 * is enough to make this atomic. */
 673                page = page_cache_alloc(mapping);
 674                if (!page)
 675                        goto no_mem;
 676                error = add_to_page_cache_lru(page, mapping, idx);
 677                if (error < 0) {
 678                        page_cache_release(page);
 679                        goto no_mem;
 680                }
 681                clear_highpage(page);
 682                inode->i_blocks += BLOCKS_PER_PAGE;
 683        }
 684
 685        /* We have the page */
 686        SetPageUptodate(page);
 687        return page;
 688
 689no_mem:
 690        spin_lock(&sbinfo->stat_lock);
 691        sbinfo->free_blocks++;
 692        spin_unlock(&sbinfo->stat_lock);
 693        return ERR_PTR(-ENOMEM);
 694
 695no_space:
 696        spin_unlock (&sbinfo->stat_lock);
 697        return ERR_PTR(-ENOSPC);
 698
 699wait_retry:
 700        spin_unlock(&info->lock);
 701        wait_on_page_locked(page);
 702        page_cache_release(page);
 703        goto repeat;
 704}
 705
 706static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 707{
 708        struct shmem_inode_info *info = SHMEM_I(inode);
 709        int error;
 710
 711        down (&info->sem);
 712        *ptr = ERR_PTR(-EFAULT);
 713        if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE)
 714                goto failed;
 715
 716        *ptr = shmem_getpage_locked(info, inode, idx);
 717        if (IS_ERR (*ptr))
 718                goto failed;
 719
 720        unlock_page(*ptr);
 721        up (&info->sem);
 722        return 0;
 723failed:
 724        up (&info->sem);
 725        error = PTR_ERR(*ptr);
 726        *ptr = NOPAGE_SIGBUS;
 727        if (error == -ENOMEM)
 728                *ptr = NOPAGE_OOM;
 729        return error;
 730}
 731
 732struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused)
 733{
 734        struct page * page;
 735        unsigned int idx;
 736        struct inode * inode = vma->vm_file->f_dentry->d_inode;
 737
 738        idx = (address - vma->vm_start) >> PAGE_CACHE_SHIFT;
 739        idx += vma->vm_pgoff;
 740
 741        if (shmem_getpage(inode, idx, &page))
 742                return page;
 743
 744        flush_page_to_ram(page);
 745        return(page);
 746}
 747
 748void shmem_lock(struct file * file, int lock)
 749{
 750        struct inode * inode = file->f_dentry->d_inode;
 751        struct shmem_inode_info * info = SHMEM_I(inode);
 752
 753        spin_lock(&info->lock);
 754        if (lock)
 755                info->flags |= VM_LOCKED;
 756        else
 757                info->flags &= ~VM_LOCKED;
 758        spin_unlock(&info->lock);
 759}
 760
 761static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
 762{
 763        struct vm_operations_struct * ops;
 764        struct inode *inode = file->f_dentry->d_inode;
 765
 766        ops = &shmem_vm_ops;
 767        if (!inode->i_sb || !S_ISREG(inode->i_mode))
 768                return -EACCES;
 769        UPDATE_ATIME(inode);
 770        vma->vm_ops = ops;
 771        return 0;
 772}
 773
 774struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 775{
 776        struct inode * inode;
 777        struct shmem_inode_info *info;
 778        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 779
 780        spin_lock (&sbinfo->stat_lock);
 781        if (!sbinfo->free_inodes) {
 782                spin_unlock (&sbinfo->stat_lock);
 783                return NULL;
 784        }
 785        sbinfo->free_inodes--;
 786        spin_unlock (&sbinfo->stat_lock);
 787
 788        inode = new_inode(sb);
 789        if (inode) {
 790                inode->i_mode = mode;
 791                inode->i_uid = current->fsuid;
 792                inode->i_gid = current->fsgid;
 793                inode->i_blksize = PAGE_CACHE_SIZE;
 794                inode->i_blocks = 0;
 795                inode->i_rdev = NODEV;
 796                inode->i_mapping->a_ops = &shmem_aops;
 797                inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
 798                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 799                info = SHMEM_I(inode);
 800                spin_lock_init (&info->lock);
 801                sema_init (&info->sem, 1);
 802                info->next_index = 0;
 803                memset (info->i_direct, 0, sizeof(info->i_direct));
 804                info->i_indirect = NULL;
 805                info->swapped = 0;
 806                info->flags = VM_ACCOUNT;
 807                switch (mode & S_IFMT) {
 808                default:
 809                        init_special_inode(inode, mode, dev);
 810                        break;
 811                case S_IFREG:
 812                        inode->i_op = &shmem_inode_operations;
 813                        inode->i_fop = &shmem_file_operations;
 814                        spin_lock (&shmem_ilock);
 815                        list_add_tail(&SHMEM_I(inode)->list, &shmem_inodes);
 816                        spin_unlock (&shmem_ilock);
 817                        break;
 818                case S_IFDIR:
 819                        inode->i_nlink++;
 820                        inode->i_op = &shmem_dir_inode_operations;
 821                        inode->i_fop = &simple_dir_operations;
 822                        break;
 823                case S_IFLNK:
 824                        break;
 825                }
 826        }
 827        return inode;
 828}
 829
 830static int shmem_set_size(struct shmem_sb_info *info,
 831                          unsigned long max_blocks, unsigned long max_inodes)
 832{
 833        int error;
 834        unsigned long blocks, inodes;
 835
 836        spin_lock(&info->stat_lock);
 837        blocks = info->max_blocks - info->free_blocks;
 838        inodes = info->max_inodes - info->free_inodes;
 839        error = -EINVAL;
 840        if (max_blocks < blocks)
 841                goto out;
 842        if (max_inodes < inodes)
 843                goto out;
 844        error = 0;
 845        info->max_blocks  = max_blocks;
 846        info->free_blocks = max_blocks - blocks;
 847        info->max_inodes  = max_inodes;
 848        info->free_inodes = max_inodes - inodes;
 849out:
 850        spin_unlock(&info->stat_lock);
 851        return error;
 852}
 853
 854#ifdef CONFIG_TMPFS
 855
 856static struct inode_operations shmem_symlink_inode_operations;
 857static struct inode_operations shmem_symlink_inline_operations;
 858
 859static ssize_t
 860shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 861{
 862        struct inode    *inode = file->f_dentry->d_inode; 
 863        struct shmem_inode_info *info;
 864        unsigned long   limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 865        loff_t          pos;
 866        struct page     *page;
 867        unsigned long   written;
 868        long            status;
 869        int             err;
 870        loff_t          maxpos;
 871
 872        if ((ssize_t) count < 0)
 873                return -EINVAL;
 874
 875        if (!access_ok(VERIFY_READ, buf, count))
 876                return -EFAULT;
 877
 878        down(&inode->i_sem);
 879
 880        pos = *ppos;
 881        err = -EINVAL;
 882        if (pos < 0)
 883                goto out_nc;
 884
 885        err = file->f_error;
 886        if (err) {
 887                file->f_error = 0;
 888                goto out_nc;
 889        }
 890
 891        written = 0;
 892
 893        if (file->f_flags & O_APPEND)
 894                pos = inode->i_size;
 895
 896        maxpos = inode->i_size;
 897        if (pos + count > inode->i_size) {
 898                maxpos = pos + count;
 899                if (maxpos > SHMEM_MAX_BYTES)
 900                        maxpos = SHMEM_MAX_BYTES;
 901                if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) {
 902                        err = -ENOMEM;
 903                        goto out_nc;
 904                }
 905        }
 906
 907        /*
 908         * Check whether we've reached the file size limit.
 909         */
 910        err = -EFBIG;
 911        if (limit != RLIM_INFINITY) {
 912                if (pos >= limit) {
 913                        send_sig(SIGXFSZ, current, 0);
 914                        goto out;
 915                }
 916                if (count > limit - pos) {
 917                        send_sig(SIGXFSZ, current, 0);
 918                        count = limit - pos;
 919                }
 920        }
 921
 922        status  = 0;
 923        if (count) {
 924                remove_suid(file->f_dentry);
 925                inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 926        }
 927
 928        while (count) {
 929                unsigned long bytes, index, offset;
 930                char *kaddr;
 931
 932                /*
 933                 * Try to find the page in the cache. If it isn't there,
 934                 * allocate a free page.
 935                 */
 936                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
 937                index = pos >> PAGE_CACHE_SHIFT;
 938                bytes = PAGE_CACHE_SIZE - offset;
 939                if (bytes > count) {
 940                        bytes = count;
 941                }
 942
 943                /*
 944                 * Bring in the user page that we will copy from _first_.
 945                 * Otherwise there's a nasty deadlock on copying from the
 946                 * same page as we're writing to, without it being marked
 947                 * up-to-date.
 948                 */
 949                { volatile unsigned char dummy;
 950                        __get_user(dummy, buf);
 951                        __get_user(dummy, buf+bytes-1);
 952                }
 953
 954                info = SHMEM_I(inode);
 955                down (&info->sem);
 956                page = shmem_getpage_locked(info, inode, index);
 957                up (&info->sem);
 958
 959                status = PTR_ERR(page);
 960                if (IS_ERR(page))
 961                        break;
 962
 963                /* We have exclusive IO access to the page.. */
 964                if (!PageLocked(page)) {
 965                        PAGE_BUG(page);
 966                }
 967
 968                kaddr = kmap(page);
 969                status = copy_from_user(kaddr+offset, buf, bytes);
 970                kunmap(page);
 971                if (status)
 972                        goto fail_write;
 973
 974                flush_dcache_page(page);
 975                if (bytes > 0) {
 976                        set_page_dirty(page);
 977                        written += bytes;
 978                        count -= bytes;
 979                        pos += bytes;
 980                        buf += bytes;
 981                        if (pos > inode->i_size) 
 982                                inode->i_size = pos;
 983                }
 984unlock:
 985                /* Mark it unlocked again and drop the page.. */
 986                unlock_page(page);
 987                page_cache_release(page);
 988
 989                if (status < 0)
 990                        break;
 991        }
 992        *ppos = pos;
 993
 994        err = written ? written : status;
 995out:
 996        /* Short writes give back address space */
 997        if (inode->i_size != maxpos)
 998                vm_unacct_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size));
 999out_nc:
1000        up(&inode->i_sem);
1001        return err;
1002fail_write:
1003        status = -EFAULT;
1004        ClearPageUptodate(page);
1005        goto unlock;
1006}
1007
1008static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc)
1009{
1010        struct inode *inode = filp->f_dentry->d_inode;
1011        struct address_space *mapping = inode->i_mapping;
1012        unsigned long index, offset;
1013        int nr = 1;
1014
1015        index = *ppos >> PAGE_CACHE_SHIFT;
1016        offset = *ppos & ~PAGE_CACHE_MASK;
1017
1018        while (nr && desc->count) {
1019                struct page *page;
1020                unsigned long end_index, nr;
1021
1022                end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1023                if (index > end_index)
1024                        break;
1025                nr = PAGE_CACHE_SIZE;
1026                if (index == end_index) {
1027                        nr = inode->i_size & ~PAGE_CACHE_MASK;
1028                        if (nr <= offset)
1029                                break;
1030                }
1031
1032                nr = nr - offset;
1033
1034                if ((desc->error = shmem_getpage(inode, index, &page)))
1035                        break;
1036
1037                if (!list_empty(&mapping->i_mmap_shared))
1038                        flush_dcache_page(page);
1039
1040                /*
1041                 * Ok, we have the page, and it's up-to-date, so
1042                 * now we can copy it to user space...
1043                 *
1044                 * The actor routine returns how many bytes were actually used..
1045                 * NOTE! This may not be the same as how much of a user buffer
1046                 * we filled up (we may be padding etc), so we can only update
1047                 * "pos" here (the actor routine has to update the user buffer
1048                 * pointers and the remaining count).
1049                 */
1050                nr = file_read_actor(desc, page, offset, nr);
1051                offset += nr;
1052                index += offset >> PAGE_CACHE_SHIFT;
1053                offset &= ~PAGE_CACHE_MASK;
1054        
1055                page_cache_release(page);
1056        }
1057
1058        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1059        UPDATE_ATIME(inode);
1060}
1061
1062static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
1063{
1064        ssize_t retval;
1065
1066        retval = -EFAULT;
1067        if (access_ok(VERIFY_WRITE, buf, count)) {
1068                retval = 0;
1069
1070                if (count) {
1071                        read_descriptor_t desc;
1072
1073                        desc.written = 0;
1074                        desc.count = count;
1075                        desc.buf = buf;
1076                        desc.error = 0;
1077                        do_shmem_file_read(filp, ppos, &desc);
1078
1079                        retval = desc.written;
1080                        if (!retval)
1081                                retval = desc.error;
1082                }
1083        }
1084        return retval;
1085}
1086
1087static int shmem_statfs(struct super_block *sb, struct statfs *buf)
1088{
1089        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1090
1091        buf->f_type = TMPFS_MAGIC;
1092        buf->f_bsize = PAGE_CACHE_SIZE;
1093        spin_lock (&sbinfo->stat_lock);
1094        buf->f_blocks = sbinfo->max_blocks;
1095        buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1096        buf->f_files = sbinfo->max_inodes;
1097        buf->f_ffree = sbinfo->free_inodes;
1098        spin_unlock (&sbinfo->stat_lock);
1099        buf->f_namelen = 255;
1100        return 0;
1101}
1102
1103/*
1104 * File creation. Allocate an inode, and we're done..
1105 */
1106static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
1107{
1108        struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
1109        int error = -ENOSPC;
1110
1111        if (inode) {
1112                dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1113                d_instantiate(dentry, inode);
1114                dget(dentry); /* Extra count - pin the dentry in core */
1115                error = 0;
1116        }
1117        return error;
1118}
1119
1120static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1121{
1122        int error;
1123
1124        if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1125                return error;
1126        dir->i_nlink++;
1127        return 0;
1128}
1129
1130static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
1131{
1132        return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1133}
1134
1135/*
1136 * Link a file..
1137 */
1138static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry)
1139{
1140        struct inode *inode = old_dentry->d_inode;
1141
1142        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1143        inode->i_nlink++;
1144        atomic_inc(&inode->i_count);    /* New dentry reference */
1145        dget(dentry);           /* Extra pinning count for the created dentry */
1146        d_instantiate(dentry, inode);
1147        return 0;
1148}
1149
1150static inline int shmem_positive(struct dentry *dentry)
1151{
1152        return dentry->d_inode && !d_unhashed(dentry);
1153}
1154
1155/*
1156 * Check that a directory is empty (this works
1157 * for regular files too, they'll just always be
1158 * considered empty..).
1159 *
1160 * Note that an empty directory can still have
1161 * children, they just all have to be negative..
1162 */
1163static int shmem_empty(struct dentry *dentry)
1164{
1165        struct list_head *list;
1166
1167        spin_lock(&dcache_lock);
1168        list = dentry->d_subdirs.next;
1169
1170        while (list != &dentry->d_subdirs) {
1171                struct dentry *de = list_entry(list, struct dentry, d_child);
1172
1173                if (shmem_positive(de)) {
1174                        spin_unlock(&dcache_lock);
1175                        return 0;
1176                }
1177                list = list->next;
1178        }
1179        spin_unlock(&dcache_lock);
1180        return 1;
1181}
1182
1183static int shmem_unlink(struct inode * dir, struct dentry *dentry)
1184{
1185        struct inode *inode = dentry->d_inode;
1186        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1187        inode->i_nlink--;
1188        dput(dentry);   /* Undo the count from "create" - this does all the work */
1189        return 0;
1190}
1191
1192static int shmem_rmdir(struct inode * dir, struct dentry *dentry)
1193{
1194        if (!shmem_empty(dentry))
1195                return -ENOTEMPTY;
1196
1197        dir->i_nlink--;
1198        return shmem_unlink(dir, dentry);
1199}
1200
1201/*
1202 * The VFS layer already does all the dentry stuff for rename,
1203 * we just have to decrement the usage count for the target if
1204 * it exists so that the VFS layer correctly free's it when it
1205 * gets overwritten.
1206 */
1207static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
1208{
1209        struct inode *inode;
1210
1211        if (!shmem_empty(new_dentry)) 
1212                return -ENOTEMPTY;
1213
1214        inode = new_dentry->d_inode;
1215        if (inode) {
1216                inode->i_ctime = CURRENT_TIME;
1217                inode->i_nlink--;
1218                dput(new_dentry);
1219        }
1220        inode = old_dentry->d_inode;
1221        if (S_ISDIR(inode->i_mode)) {
1222                old_dir->i_nlink--;
1223                new_dir->i_nlink++;
1224        }
1225
1226        inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1227        return 0;
1228}
1229
1230static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
1231{
1232        int len;
1233        struct inode *inode;
1234        struct page *page;
1235        char *kaddr;
1236        struct shmem_inode_info * info;
1237
1238        len = strlen(symname) + 1;
1239        if (len > PAGE_CACHE_SIZE)
1240                return -ENAMETOOLONG;
1241
1242        inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1243        if (!inode)
1244                return -ENOSPC;
1245
1246        info = SHMEM_I(inode);
1247        inode->i_size = len-1;
1248        if (len <= (char *)inode - (char *)info) {
1249                /* do it inline */
1250                memcpy(info, symname, len);
1251                inode->i_op = &shmem_symlink_inline_operations;
1252        } else {
1253                if (!vm_enough_memory(VM_ACCT(1))) {
1254                        iput(inode);
1255                        return -ENOMEM;
1256                }
1257                down(&info->sem);
1258                page = shmem_getpage_locked(info, inode, 0);
1259                if (IS_ERR(page)) {
1260                        up(&info->sem);
1261                        vm_unacct_memory(VM_ACCT(1));
1262                        iput(inode);
1263                        return PTR_ERR(page);
1264                }
1265                inode->i_op = &shmem_symlink_inode_operations;
1266                spin_lock (&shmem_ilock);
1267                list_add_tail(&info->list, &shmem_inodes);
1268                spin_unlock (&shmem_ilock);
1269                kaddr = kmap(page);
1270                memcpy(kaddr, symname, len);
1271                kunmap(page);
1272                set_page_dirty(page);
1273                unlock_page(page);
1274                page_cache_release(page);
1275                up(&info->sem);
1276        }
1277        dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1278        d_instantiate(dentry, inode);
1279        dget(dentry);
1280        return 0;
1281}
1282
1283static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
1284{
1285        return vfs_readlink(dentry,buffer,buflen, (const char *)SHMEM_I(dentry->d_inode));
1286}
1287
1288static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1289{
1290        return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1291}
1292
1293static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
1294{
1295        struct page * page;
1296        int res = shmem_getpage(dentry->d_inode, 0, &page);
1297
1298        if (res)
1299                return res;
1300
1301        res = vfs_readlink(dentry,buffer,buflen, kmap(page));
1302        kunmap(page);
1303        page_cache_release(page);
1304        return res;
1305}
1306
1307static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1308{
1309        struct page * page;
1310        int res = shmem_getpage(dentry->d_inode, 0, &page);
1311        if (res)
1312                return res;
1313
1314        res = vfs_follow_link(nd, kmap(page));
1315        kunmap(page);
1316        page_cache_release(page);
1317        return res;
1318}
1319
1320static struct inode_operations shmem_symlink_inline_operations = {
1321        .readlink       = shmem_readlink_inline,
1322        .follow_link    = shmem_follow_link_inline,
1323};
1324
1325static struct inode_operations shmem_symlink_inode_operations = {
1326        .truncate       = shmem_truncate,
1327        .readlink       = shmem_readlink,
1328        .follow_link    = shmem_follow_link,
1329};
1330
1331static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long * blocks, unsigned long *inodes)
1332{
1333        char *this_char, *value, *rest;
1334
1335        while ((this_char = strsep(&options, ",")) != NULL) {
1336                if (!*this_char)
1337                        continue;
1338                if ((value = strchr(this_char,'=')) != NULL) {
1339                        *value++ = 0;
1340                } else {
1341                        printk(KERN_ERR 
1342                            "tmpfs: No value for mount option '%s'\n", 
1343                            this_char);
1344                        return 1;
1345                }
1346
1347                if (!strcmp(this_char,"size")) {
1348                        unsigned long long size;
1349                        size = memparse(value,&rest);
1350                        if (*rest)
1351                                goto bad_val;
1352                        *blocks = size >> PAGE_CACHE_SHIFT;
1353                } else if (!strcmp(this_char,"nr_blocks")) {
1354                        *blocks = memparse(value,&rest);
1355                        if (*rest)
1356                                goto bad_val;
1357                } else if (!strcmp(this_char,"nr_inodes")) {
1358                        *inodes = memparse(value,&rest);
1359                        if (*rest)
1360                                goto bad_val;
1361                } else if (!strcmp(this_char,"mode")) {
1362                        if (!mode)
1363                                continue;
1364                        *mode = simple_strtoul(value,&rest,8);
1365                        if (*rest)
1366                                goto bad_val;
1367                } else if (!strcmp(this_char,"uid")) {
1368                        if (!uid)
1369                                continue;
1370                        *uid = simple_strtoul(value,&rest,0);
1371                        if (*rest)
1372                                goto bad_val;
1373                } else if (!strcmp(this_char,"gid")) {
1374                        if (!gid)
1375                                continue;
1376                        *gid = simple_strtoul(value,&rest,0);
1377                        if (*rest)
1378                                goto bad_val;
1379                } else {
1380                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1381                               this_char);
1382                        return 1;
1383                }
1384        }
1385        return 0;
1386
1387bad_val:
1388        printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", 
1389               value, this_char);
1390        return 1;
1391
1392}
1393
1394static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
1395{
1396        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1397        unsigned long max_blocks = sbinfo->max_blocks;
1398        unsigned long max_inodes = sbinfo->max_inodes;
1399
1400        if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1401                return -EINVAL;
1402        return shmem_set_size(sbinfo, max_blocks, max_inodes);
1403}
1404
1405int shmem_sync_file(struct file * file, struct dentry *dentry, int datasync)
1406{
1407        return 0;
1408}
1409#endif
1410
1411static int shmem_fill_super(struct super_block * sb, void * data, int silent)
1412{
1413        struct inode * inode;
1414        struct dentry * root;
1415        unsigned long blocks, inodes;
1416        int mode   = S_IRWXUGO | S_ISVTX;
1417        uid_t uid = current->fsuid;
1418        gid_t gid = current->fsgid;
1419        struct shmem_sb_info *sbinfo;
1420        struct sysinfo si;
1421        int err;
1422
1423        sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
1424        if (!sbinfo)
1425                return -ENOMEM;
1426        sb->u.generic_sbp = sbinfo;
1427        memset(sbinfo, 0, sizeof(struct shmem_sb_info));
1428
1429        /*
1430         * Per default we only allow half of the physical ram per
1431         * tmpfs instance
1432         */
1433        si_meminfo(&si);
1434        blocks = inodes = si.totalram / 2;
1435
1436#ifdef CONFIG_TMPFS
1437        if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, &inodes)) {
1438                err = -EINVAL;
1439                goto failed;
1440        }
1441#else
1442        sb->s_flags |= MS_NOUSER;
1443#endif
1444
1445        spin_lock_init (&sbinfo->stat_lock);
1446        sbinfo->max_blocks = blocks;
1447        sbinfo->free_blocks = blocks;
1448        sbinfo->max_inodes = inodes;
1449        sbinfo->free_inodes = inodes;
1450        sb->s_maxbytes = SHMEM_MAX_BYTES;
1451        sb->s_blocksize = PAGE_CACHE_SIZE;
1452        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1453        sb->s_magic = TMPFS_MAGIC;
1454        sb->s_op = &shmem_ops;
1455        inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1456        if (!inode) {
1457                err = -ENOMEM;
1458                goto failed;
1459        }
1460
1461        inode->i_uid = uid;
1462        inode->i_gid = gid;
1463        root = d_alloc_root(inode);
1464        if (!root) {
1465                err = -ENOMEM;
1466                goto failed_iput;
1467        }
1468        sb->s_root = root;
1469        return 0;
1470
1471failed_iput:
1472        iput(inode);
1473failed:
1474        kfree(sbinfo);
1475        sb->u.generic_sbp = NULL;
1476        return err;
1477}
1478
1479static void shmem_put_super(struct super_block *sb)
1480{
1481        kfree(sb->u.generic_sbp);
1482        sb->u.generic_sbp = NULL;
1483}
1484
1485static kmem_cache_t * shmem_inode_cachep;
1486
1487static struct inode *shmem_alloc_inode(struct super_block *sb)
1488{
1489        struct shmem_inode_info *p;
1490        p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
1491        if (!p)
1492                return NULL;
1493        return &p->vfs_inode;
1494}
1495
1496static void shmem_destroy_inode(struct inode *inode)
1497{
1498        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
1499}
1500
1501static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
1502{
1503        struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
1504
1505        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1506            SLAB_CTOR_CONSTRUCTOR) {
1507                inode_init_once(&p->vfs_inode);
1508        }
1509}
1510 
1511static int init_inodecache(void)
1512{
1513        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
1514                                             sizeof(struct shmem_inode_info),
1515                                             0, SLAB_HWCACHE_ALIGN,
1516                                             init_once, NULL);
1517        if (shmem_inode_cachep == NULL)
1518                return -ENOMEM;
1519        return 0;
1520}
1521
1522static void destroy_inodecache(void)
1523{
1524        if (kmem_cache_destroy(shmem_inode_cachep))
1525                printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
1526}
1527
1528static struct address_space_operations shmem_aops = {
1529        .writepage      = shmem_writepage,
1530        .set_page_dirty = __set_page_dirty_nobuffers,
1531};
1532
1533static struct file_operations shmem_file_operations = {
1534        .mmap           = shmem_mmap,
1535#ifdef CONFIG_TMPFS
1536        .read           = shmem_file_read,
1537        .write          = shmem_file_write,
1538        .fsync          = shmem_sync_file,
1539#endif
1540};
1541
1542static struct inode_operations shmem_inode_operations = {
1543        .truncate       = shmem_truncate,
1544        .setattr        = shmem_notify_change,
1545};
1546
1547static struct inode_operations shmem_dir_inode_operations = {
1548#ifdef CONFIG_TMPFS
1549        .create         = shmem_create,
1550        .lookup         = simple_lookup,
1551        .link           = shmem_link,
1552        .unlink         = shmem_unlink,
1553        .symlink        = shmem_symlink,
1554        .mkdir          = shmem_mkdir,
1555        .rmdir          = shmem_rmdir,
1556        .mknod          = shmem_mknod,
1557        .rename         = shmem_rename,
1558#endif
1559};
1560
1561static struct super_operations shmem_ops = {
1562        .alloc_inode    = shmem_alloc_inode,
1563        .destroy_inode  = shmem_destroy_inode,
1564#ifdef CONFIG_TMPFS
1565        .statfs         = shmem_statfs,
1566        .remount_fs     = shmem_remount_fs,
1567#endif
1568        .delete_inode   = shmem_delete_inode,
1569        .drop_inode     = generic_delete_inode,
1570        .put_super      = shmem_put_super,
1571};
1572
1573static struct vm_operations_struct shmem_vm_ops = {
1574        .nopage         = shmem_nopage,
1575};
1576
1577static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
1578        int flags, char *dev_name, void *data)
1579{
1580        return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
1581}
1582
1583#ifdef CONFIG_TMPFS
1584/* type "shm" will be tagged obsolete in 2.5 */
1585static struct file_system_type shmem_fs_type = {
1586        .owner          = THIS_MODULE,
1587        .name           = "shmem",
1588        .get_sb         = shmem_get_sb,
1589        .kill_sb        = kill_litter_super,
1590};
1591#endif
1592static struct file_system_type tmpfs_fs_type = {
1593        .owner          = THIS_MODULE,
1594        .name           = "tmpfs",
1595        .get_sb         = shmem_get_sb,
1596        .kill_sb        = kill_litter_super,
1597};
1598static struct vfsmount *shm_mnt;
1599
1600static int __init init_shmem_fs(void)
1601{
1602        int error;
1603        struct vfsmount * res;
1604
1605        error = init_inodecache();
1606        if (error)
1607                goto out3;
1608
1609        error = register_filesystem(&tmpfs_fs_type);
1610        if (error) {
1611                printk (KERN_ERR "Could not register tmpfs\n");
1612                goto out2;
1613        }
1614#ifdef CONFIG_TMPFS
1615        error = register_filesystem(&shmem_fs_type);
1616        if (error) {
1617                printk (KERN_ERR "Could not register shm fs\n");
1618                goto out1;
1619        }
1620        devfs_mk_dir (NULL, "shm", NULL);
1621#endif
1622        res = kern_mount(&tmpfs_fs_type);
1623        if (IS_ERR (res)) {
1624                error = PTR_ERR(res);
1625                printk (KERN_ERR "could not kern_mount tmpfs\n");
1626                goto out;
1627        }
1628        shm_mnt = res;
1629
1630        /* The internal instance should not do size checking */
1631        shmem_set_size(SHMEM_SB(res->mnt_sb), ULONG_MAX, ULONG_MAX);
1632        return 0;
1633
1634out:
1635#ifdef CONFIG_TMPFS
1636        unregister_filesystem(&shmem_fs_type);
1637out1:
1638#endif
1639        unregister_filesystem(&tmpfs_fs_type);
1640out2:
1641        destroy_inodecache();
1642out3:
1643        return error;
1644}
1645
1646static void __exit exit_shmem_fs(void)
1647{
1648#ifdef CONFIG_TMPFS
1649        unregister_filesystem(&shmem_fs_type);
1650#endif
1651        unregister_filesystem(&tmpfs_fs_type);
1652        mntput(shm_mnt);
1653        destroy_inodecache();
1654}
1655
1656module_init(init_shmem_fs)
1657module_exit(exit_shmem_fs)
1658
1659/*
1660 * shmem_file_setup - get an unlinked file living in shmem fs
1661 *
1662 * @name: name for dentry (to be seen in /proc/<pid>/maps
1663 * @size: size to be set for the file
1664 *
1665 */
1666struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags)
1667{
1668        int error;
1669        struct file *file;
1670        struct inode * inode;
1671        struct dentry *dentry, *root;
1672        struct qstr this;
1673
1674        if (size > SHMEM_MAX_BYTES)
1675                return ERR_PTR(-EINVAL);
1676
1677        if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size)))
1678                return ERR_PTR(-ENOMEM);
1679
1680        error = -ENOMEM;
1681        this.name = name;
1682        this.len = strlen(name);
1683        this.hash = 0; /* will go */
1684        root = shm_mnt->mnt_root;
1685        dentry = d_alloc(root, &this);
1686        if (!dentry)
1687                goto put_memory;
1688
1689        error = -ENFILE;
1690        file = get_empty_filp();
1691        if (!file)
1692                goto put_dentry;
1693
1694        error = -ENOSPC;
1695        inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1696        if (!inode) 
1697                goto close_file;
1698
1699        SHMEM_I(inode)->flags &= flags;
1700        d_instantiate(dentry, inode);
1701        inode->i_size = size;
1702        inode->i_nlink = 0;     /* It is unlinked */
1703        file->f_vfsmnt = mntget(shm_mnt);
1704        file->f_dentry = dentry;
1705        file->f_op = &shmem_file_operations;
1706        file->f_mode = FMODE_WRITE | FMODE_READ;
1707        return(file);
1708
1709close_file:
1710        put_filp(file);
1711put_dentry:
1712        dput (dentry);
1713put_memory:
1714        if (flags & VM_ACCOUNT)
1715                vm_unacct_memory(VM_ACCT(size));
1716        return ERR_PTR(error);  
1717}
1718
1719/*
1720 * shmem_zero_setup - setup a shared anonymous mapping
1721 *
1722 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1723 */
1724int shmem_zero_setup(struct vm_area_struct *vma)
1725{
1726        struct file *file;
1727        loff_t size = vma->vm_end - vma->vm_start;
1728        
1729        file = shmem_file_setup("dev/zero", size, vma->vm_flags);
1730        if (IS_ERR(file))
1731                return PTR_ERR(file);
1732
1733        if (vma->vm_file)
1734                fput (vma->vm_file);
1735        vma->vm_file = file;
1736        vma->vm_ops = &shmem_vm_ops;
1737        return 0;
1738}
1739
1740EXPORT_SYMBOL(shmem_file_setup);
1741
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.