linux-bk/mm/shmem.c
<<
>>
Prefs
   1/*
   2 * Resizable virtual memory filesystem for Linux.
   3 *
   4 * Copyright (C) 2000 Linus Torvalds.
   5 *               2000 Transmeta Corp.
   6 *               2000-2001 Christoph Rohland
   7 *               2000-2001 SAP AG
   8 *               2002 Red Hat Inc.
   9 * Copyright (C) 2002-2003 Hugh Dickins.
  10 * Copyright (C) 2002-2003 VERITAS Software Corporation.
  11 * Copyright (C) 2004 Andi Kleen, SuSE Labs
  12 *
  13 * This file is released under the GPL.
  14 */
  15
  16/*
  17 * This virtual memory filesystem is heavily based on the ramfs. It
  18 * extends ramfs by the ability to use swap and honor resource limits
  19 * which makes it a completely usable filesystem.
  20 */
  21
  22#include <linux/config.h>
  23#include <linux/module.h>
  24#include <linux/init.h>
  25#include <linux/devfs_fs_kernel.h>
  26#include <linux/fs.h>
  27#include <linux/mm.h>
  28#include <linux/mman.h>
  29#include <linux/file.h>
  30#include <linux/swap.h>
  31#include <linux/pagemap.h>
  32#include <linux/string.h>
  33#include <linux/slab.h>
  34#include <linux/backing-dev.h>
  35#include <linux/shmem_fs.h>
  36#include <linux/mount.h>
  37#include <linux/writeback.h>
  38#include <linux/vfs.h>
  39#include <linux/blkdev.h>
  40#include <linux/security.h>
  41#include <linux/swapops.h>
  42#include <linux/mempolicy.h>
  43#include <linux/namei.h>
  44#include <asm/uaccess.h>
  45#include <asm/div64.h>
  46#include <asm/pgtable.h>
  47
  48/* This magic number is used in glibc for posix shared memory */
  49#define TMPFS_MAGIC     0x01021994
  50
  51#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
  52#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
  53#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
  54
  55#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
  56#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
  57
  58#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
  59
  60/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
  61#define SHMEM_PAGEIN     VM_READ
  62#define SHMEM_TRUNCATE   VM_WRITE
  63
  64/* Pretend that each entry is of this size in directory's i_size */
  65#define BOGO_DIRENT_SIZE 20
  66
  67/* Keep swapped page count in private field of indirect struct page */
  68#define nr_swapped              private
  69
  70/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
  71enum sgp_type {
  72        SGP_QUICK,      /* don't try more than file page cache lookup */
  73        SGP_READ,       /* don't exceed i_size, don't allocate page */
  74        SGP_CACHE,      /* don't exceed i_size, may allocate page */
  75        SGP_WRITE,      /* may exceed i_size, may allocate page */
  76};
  77
  78static int shmem_getpage(struct inode *inode, unsigned long idx,
  79                         struct page **pagep, enum sgp_type sgp, int *type);
  80
  81static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
  82{
  83        /*
  84         * The above definition of ENTRIES_PER_PAGE, and the use of
  85         * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
  86         * might be reconsidered if it ever diverges from PAGE_SIZE.
  87         */
  88        return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
  89}
  90
  91static inline void shmem_dir_free(struct page *page)
  92{
  93        __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
  94}
  95
  96static struct page **shmem_dir_map(struct page *page)
  97{
  98        return (struct page **)kmap_atomic(page, KM_USER0);
  99}
 100
 101static inline void shmem_dir_unmap(struct page **dir)
 102{
 103        kunmap_atomic(dir, KM_USER0);
 104}
 105
 106static swp_entry_t *shmem_swp_map(struct page *page)
 107{
 108        return (swp_entry_t *)kmap_atomic(page, KM_USER1);
 109}
 110
 111static inline void shmem_swp_balance_unmap(void)
 112{
 113        /*
 114         * When passing a pointer to an i_direct entry, to code which
 115         * also handles indirect entries and so will shmem_swp_unmap,
 116         * we must arrange for the preempt count to remain in balance.
 117         * What kmap_atomic of a lowmem page does depends on config
 118         * and architecture, so pretend to kmap_atomic some lowmem page.
 119         */
 120        (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
 121}
 122
 123static inline void shmem_swp_unmap(swp_entry_t *entry)
 124{
 125        kunmap_atomic(entry, KM_USER1);
 126}
 127
 128static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
 129{
 130        return sb->s_fs_info;
 131}
 132
 133/*
 134 * shmem_file_setup pre-accounts the whole fixed size of a VM object,
 135 * for shared memory and for shared anonymous (/dev/zero) mappings
 136 * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
 137 * consistent with the pre-accounting of private mappings ...
 138 */
 139static inline int shmem_acct_size(unsigned long flags, loff_t size)
 140{
 141        return (flags & VM_ACCOUNT)?
 142                security_vm_enough_memory(VM_ACCT(size)): 0;
 143}
 144
 145static inline void shmem_unacct_size(unsigned long flags, loff_t size)
 146{
 147        if (flags & VM_ACCOUNT)
 148                vm_unacct_memory(VM_ACCT(size));
 149}
 150
 151/*
 152 * ... whereas tmpfs objects are accounted incrementally as
 153 * pages are allocated, in order to allow huge sparse files.
 154 * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
 155 * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
 156 */
 157static inline int shmem_acct_block(unsigned long flags)
 158{
 159        return (flags & VM_ACCOUNT)?
 160                0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
 161}
 162
 163static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 164{
 165        if (!(flags & VM_ACCOUNT))
 166                vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
 167}
 168
 169static struct super_operations shmem_ops;
 170static struct address_space_operations shmem_aops;
 171static struct file_operations shmem_file_operations;
 172static struct inode_operations shmem_inode_operations;
 173static struct inode_operations shmem_dir_inode_operations;
 174static struct vm_operations_struct shmem_vm_ops;
 175
 176static struct backing_dev_info shmem_backing_dev_info = {
 177        .ra_pages       = 0,    /* No readahead */
 178        .memory_backed  = 1,    /* Does not contribute to dirty memory */
 179        .unplug_io_fn = default_unplug_io_fn,
 180};
 181
 182LIST_HEAD(shmem_inodes);
 183static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
 184
 185static void shmem_free_block(struct inode *inode)
 186{
 187        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 188        spin_lock(&sbinfo->stat_lock);
 189        sbinfo->free_blocks++;
 190        inode->i_blocks -= BLOCKS_PER_PAGE;
 191        spin_unlock(&sbinfo->stat_lock);
 192}
 193
 194/*
 195 * shmem_recalc_inode - recalculate the size of an inode
 196 *
 197 * @inode: inode to recalc
 198 *
 199 * We have to calculate the free blocks since the mm can drop
 200 * undirtied hole pages behind our back.
 201 *
 202 * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
 203 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
 204 *
 205 * It has to be called with the spinlock held.
 206 */
 207static void shmem_recalc_inode(struct inode *inode)
 208{
 209        struct shmem_inode_info *info = SHMEM_I(inode);
 210        long freed;
 211
 212        freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
 213        if (freed > 0) {
 214                struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 215                info->alloced -= freed;
 216                spin_lock(&sbinfo->stat_lock);
 217                sbinfo->free_blocks += freed;
 218                inode->i_blocks -= freed*BLOCKS_PER_PAGE;
 219                spin_unlock(&sbinfo->stat_lock);
 220                shmem_unacct_blocks(info->flags, freed);
 221        }
 222}
 223
 224/*
 225 * shmem_swp_entry - find the swap vector position in the info structure
 226 *
 227 * @info:  info structure for the inode
 228 * @index: index of the page to find
 229 * @page:  optional page to add to the structure. Has to be preset to
 230 *         all zeros
 231 *
 232 * If there is no space allocated yet it will return NULL when
 233 * page is NULL, else it will use the page for the needed block,
 234 * setting it to NULL on return to indicate that it has been used.
 235 *
 236 * The swap vector is organized the following way:
 237 *
 238 * There are SHMEM_NR_DIRECT entries directly stored in the
 239 * shmem_inode_info structure. So small files do not need an addional
 240 * allocation.
 241 *
 242 * For pages with index > SHMEM_NR_DIRECT there is the pointer
 243 * i_indirect which points to a page which holds in the first half
 244 * doubly indirect blocks, in the second half triple indirect blocks:
 245 *
 246 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
 247 * following layout (for SHMEM_NR_DIRECT == 16):
 248 *
 249 * i_indirect -> dir --> 16-19
 250 *            |      +-> 20-23
 251 *            |
 252 *            +-->dir2 --> 24-27
 253 *            |        +-> 28-31
 254 *            |        +-> 32-35
 255 *            |        +-> 36-39
 256 *            |
 257 *            +-->dir3 --> 40-43
 258 *                     +-> 44-47
 259 *                     +-> 48-51
 260 *                     +-> 52-55
 261 */
 262static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
 263{
 264        unsigned long offset;
 265        struct page **dir;
 266        struct page *subdir;
 267
 268        if (index < SHMEM_NR_DIRECT) {
 269                shmem_swp_balance_unmap();
 270                return info->i_direct+index;
 271        }
 272        if (!info->i_indirect) {
 273                if (page) {
 274                        info->i_indirect = *page;
 275                        *page = NULL;
 276                }
 277                return NULL;                    /* need another page */
 278        }
 279
 280        index -= SHMEM_NR_DIRECT;
 281        offset = index % ENTRIES_PER_PAGE;
 282        index /= ENTRIES_PER_PAGE;
 283        dir = shmem_dir_map(info->i_indirect);
 284
 285        if (index >= ENTRIES_PER_PAGE/2) {
 286                index -= ENTRIES_PER_PAGE/2;
 287                dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
 288                index %= ENTRIES_PER_PAGE;
 289                subdir = *dir;
 290                if (!subdir) {
 291                        if (page) {
 292                                *dir = *page;
 293                                *page = NULL;
 294                        }
 295                        shmem_dir_unmap(dir);
 296                        return NULL;            /* need another page */
 297                }
 298                shmem_dir_unmap(dir);
 299                dir = shmem_dir_map(subdir);
 300        }
 301
 302        dir += index;
 303        subdir = *dir;
 304        if (!subdir) {
 305                if (!page || !(subdir = *page)) {
 306                        shmem_dir_unmap(dir);
 307                        return NULL;            /* need a page */
 308                }
 309                *dir = subdir;
 310                *page = NULL;
 311        }
 312        shmem_dir_unmap(dir);
 313        return shmem_swp_map(subdir) + offset;
 314}
 315
 316static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
 317{
 318        long incdec = value? 1: -1;
 319
 320        entry->val = value;
 321        info->swapped += incdec;
 322        if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
 323                kmap_atomic_to_page(entry)->nr_swapped += incdec;
 324}
 325
 326/*
 327 * shmem_swp_alloc - get the position of the swap entry for the page.
 328 *                   If it does not exist allocate the entry.
 329 *
 330 * @info:       info structure for the inode
 331 * @index:      index of the page to find
 332 * @sgp:        check and recheck i_size? skip allocation?
 333 */
 334static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
 335{
 336        struct inode *inode = &info->vfs_inode;
 337        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 338        struct page *page = NULL;
 339        swp_entry_t *entry;
 340
 341        if (sgp != SGP_WRITE &&
 342            ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 343                return ERR_PTR(-EINVAL);
 344
 345        while (!(entry = shmem_swp_entry(info, index, &page))) {
 346                if (sgp == SGP_READ)
 347                        return shmem_swp_map(ZERO_PAGE(0));
 348                /*
 349                 * Test free_blocks against 1 not 0, since we have 1 data
 350                 * page (and perhaps indirect index pages) yet to allocate:
 351                 * a waste to allocate index if we cannot allocate data.
 352                 */
 353                spin_lock(&sbinfo->stat_lock);
 354                if (sbinfo->free_blocks <= 1) {
 355                        spin_unlock(&sbinfo->stat_lock);
 356                        return ERR_PTR(-ENOSPC);
 357                }
 358                sbinfo->free_blocks--;
 359                inode->i_blocks += BLOCKS_PER_PAGE;
 360                spin_unlock(&sbinfo->stat_lock);
 361
 362                spin_unlock(&info->lock);
 363                page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
 364                if (page) {
 365                        clear_highpage(page);
 366                        page->nr_swapped = 0;
 367                }
 368                spin_lock(&info->lock);
 369
 370                if (!page) {
 371                        shmem_free_block(inode);
 372                        return ERR_PTR(-ENOMEM);
 373                }
 374                if (sgp != SGP_WRITE &&
 375                    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 376                        entry = ERR_PTR(-EINVAL);
 377                        break;
 378                }
 379                if (info->next_index <= index)
 380                        info->next_index = index + 1;
 381        }
 382        if (page) {
 383                /* another task gave its page, or truncated the file */
 384                shmem_free_block(inode);
 385                shmem_dir_free(page);
 386        }
 387        if (info->next_index <= index && !IS_ERR(entry))
 388                info->next_index = index + 1;
 389        return entry;
 390}
 391
 392/*
 393 * shmem_free_swp - free some swap entries in a directory
 394 *
 395 * @dir:   pointer to the directory
 396 * @edir:  pointer after last entry of the directory
 397 */
 398static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
 399{
 400        swp_entry_t *ptr;
 401        int freed = 0;
 402
 403        for (ptr = dir; ptr < edir; ptr++) {
 404                if (ptr->val) {
 405                        free_swap_and_cache(*ptr);
 406                        *ptr = (swp_entry_t){0};
 407                        freed++;
 408                }
 409        }
 410        return freed;
 411}
 412
 413static void shmem_truncate(struct inode *inode)
 414{
 415        struct shmem_inode_info *info = SHMEM_I(inode);
 416        unsigned long idx;
 417        unsigned long size;
 418        unsigned long limit;
 419        unsigned long stage;
 420        struct page **dir;
 421        struct page *subdir;
 422        struct page *empty;
 423        swp_entry_t *ptr;
 424        int offset;
 425        int freed;
 426
 427        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 428        idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 429        if (idx >= info->next_index)
 430                return;
 431
 432        spin_lock(&info->lock);
 433        info->flags |= SHMEM_TRUNCATE;
 434        limit = info->next_index;
 435        info->next_index = idx;
 436        if (info->swapped && idx < SHMEM_NR_DIRECT) {
 437                ptr = info->i_direct;
 438                size = limit;
 439                if (size > SHMEM_NR_DIRECT)
 440                        size = SHMEM_NR_DIRECT;
 441                info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
 442        }
 443        if (!info->i_indirect)
 444                goto done2;
 445
 446        BUG_ON(limit <= SHMEM_NR_DIRECT);
 447        limit -= SHMEM_NR_DIRECT;
 448        idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
 449        offset = idx % ENTRIES_PER_PAGE;
 450        idx -= offset;
 451
 452        empty = NULL;
 453        dir = shmem_dir_map(info->i_indirect);
 454        stage = ENTRIES_PER_PAGEPAGE/2;
 455        if (idx < ENTRIES_PER_PAGEPAGE/2)
 456                dir += idx/ENTRIES_PER_PAGE;
 457        else {
 458                dir += ENTRIES_PER_PAGE/2;
 459                dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
 460                while (stage <= idx)
 461                        stage += ENTRIES_PER_PAGEPAGE;
 462                if (*dir) {
 463                        subdir = *dir;
 464                        size = ((idx - ENTRIES_PER_PAGEPAGE/2) %
 465                                ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
 466                        if (!size && !offset) {
 467                                empty = subdir;
 468                                *dir = NULL;
 469                        }
 470                        shmem_dir_unmap(dir);
 471                        dir = shmem_dir_map(subdir) + size;
 472                } else {
 473                        offset = 0;
 474                        idx = stage;
 475                }
 476        }
 477
 478        for (; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
 479                if (unlikely(idx == stage)) {
 480                        shmem_dir_unmap(dir-1);
 481                        dir = shmem_dir_map(info->i_indirect) +
 482                            ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
 483                        while (!*dir) {
 484                                dir++;
 485                                idx += ENTRIES_PER_PAGEPAGE;
 486                                if (idx >= limit)
 487                                        goto done1;
 488                        }
 489                        stage = idx + ENTRIES_PER_PAGEPAGE;
 490                        subdir = *dir;
 491                        *dir = NULL;
 492                        shmem_dir_unmap(dir);
 493                        if (empty) {
 494                                shmem_dir_free(empty);
 495                                shmem_free_block(inode);
 496                        }
 497                        empty = subdir;
 498                        cond_resched_lock(&info->lock);
 499                        dir = shmem_dir_map(subdir);
 500                }
 501                subdir = *dir;
 502                if (subdir && subdir->nr_swapped) {
 503                        ptr = shmem_swp_map(subdir);
 504                        size = limit - idx;
 505                        if (size > ENTRIES_PER_PAGE)
 506                                size = ENTRIES_PER_PAGE;
 507                        freed = shmem_free_swp(ptr+offset, ptr+size);
 508                        shmem_swp_unmap(ptr);
 509                        info->swapped -= freed;
 510                        subdir->nr_swapped -= freed;
 511                        BUG_ON(subdir->nr_swapped > offset);
 512                }
 513                if (offset)
 514                        offset = 0;
 515                else if (subdir) {
 516                        *dir = NULL;
 517                        shmem_dir_free(subdir);
 518                        shmem_free_block(inode);
 519                }
 520        }
 521done1:
 522        shmem_dir_unmap(dir-1);
 523        if (empty) {
 524                shmem_dir_free(empty);
 525                shmem_free_block(inode);
 526        }
 527        if (info->next_index <= SHMEM_NR_DIRECT) {
 528                shmem_dir_free(info->i_indirect);
 529                info->i_indirect = NULL;
 530                shmem_free_block(inode);
 531        }
 532done2:
 533        BUG_ON(info->swapped > info->next_index);
 534        if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
 535                /*
 536                 * Call truncate_inode_pages again: racing shmem_unuse_inode
 537                 * may have swizzled a page in from swap since vmtruncate or
 538                 * generic_delete_inode did it, before we lowered next_index.
 539                 * Also, though shmem_getpage checks i_size before adding to
 540                 * cache, no recheck after: so fix the narrow window there too.
 541                 */
 542                spin_unlock(&info->lock);
 543                truncate_inode_pages(inode->i_mapping, inode->i_size);
 544                spin_lock(&info->lock);
 545        }
 546        info->flags &= ~SHMEM_TRUNCATE;
 547        shmem_recalc_inode(inode);
 548        spin_unlock(&info->lock);
 549}
 550
 551static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 552{
 553        struct inode *inode = dentry->d_inode;
 554        struct page *page = NULL;
 555        int error;
 556
 557        if (attr->ia_valid & ATTR_SIZE) {
 558                if (attr->ia_size < inode->i_size) {
 559                        /*
 560                         * If truncating down to a partial page, then
 561                         * if that page is already allocated, hold it
 562                         * in memory until the truncation is over, so
 563                         * truncate_partial_page cannnot miss it were
 564                         * it assigned to swap.
 565                         */
 566                        if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
 567                                (void) shmem_getpage(inode,
 568                                        attr->ia_size>>PAGE_CACHE_SHIFT,
 569                                                &page, SGP_READ, NULL);
 570                        }
 571                        /*
 572                         * Reset SHMEM_PAGEIN flag so that shmem_truncate can
 573                         * detect if any pages might have been added to cache
 574                         * after truncate_inode_pages.  But we needn't bother
 575                         * if it's being fully truncated to zero-length: the
 576                         * nrpages check is efficient enough in that case.
 577                         */
 578                        if (attr->ia_size) {
 579                                struct shmem_inode_info *info = SHMEM_I(inode);
 580                                spin_lock(&info->lock);
 581                                info->flags &= ~SHMEM_PAGEIN;
 582                                spin_unlock(&info->lock);
 583                        }
 584                }
 585        }
 586
 587        error = inode_change_ok(inode, attr);
 588        if (!error)
 589                error = inode_setattr(inode, attr);
 590        if (page)
 591                page_cache_release(page);
 592        return error;
 593}
 594
 595static void shmem_delete_inode(struct inode *inode)
 596{
 597        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 598        struct shmem_inode_info *info = SHMEM_I(inode);
 599
 600        if (inode->i_op->truncate == shmem_truncate) {
 601                spin_lock(&shmem_ilock);
 602                list_del(&info->list);
 603                spin_unlock(&shmem_ilock);
 604                shmem_unacct_size(info->flags, inode->i_size);
 605                inode->i_size = 0;
 606                shmem_truncate(inode);
 607        }
 608        BUG_ON(inode->i_blocks);
 609        spin_lock(&sbinfo->stat_lock);
 610        sbinfo->free_inodes++;
 611        spin_unlock(&sbinfo->stat_lock);
 612        clear_inode(inode);
 613}
 614
 615static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
 616{
 617        swp_entry_t *ptr;
 618
 619        for (ptr = dir; ptr < edir; ptr++) {
 620                if (ptr->val == entry.val)
 621                        return ptr - dir;
 622        }
 623        return -1;
 624}
 625
 626static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 627{
 628        struct inode *inode;
 629        unsigned long idx;
 630        unsigned long size;
 631        unsigned long limit;
 632        unsigned long stage;
 633        struct page **dir;
 634        struct page *subdir;
 635        swp_entry_t *ptr;
 636        int offset;
 637
 638        idx = 0;
 639        ptr = info->i_direct;
 640        spin_lock(&info->lock);
 641        limit = info->next_index;
 642        size = limit;
 643        if (size > SHMEM_NR_DIRECT)
 644                size = SHMEM_NR_DIRECT;
 645        offset = shmem_find_swp(entry, ptr, ptr+size);
 646        if (offset >= 0) {
 647                shmem_swp_balance_unmap();
 648                goto found;
 649        }
 650        if (!info->i_indirect)
 651                goto lost2;
 652        /* we might be racing with shmem_truncate */
 653        if (limit <= SHMEM_NR_DIRECT)
 654                goto lost2;
 655
 656        dir = shmem_dir_map(info->i_indirect);
 657        stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
 658
 659        for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
 660                if (unlikely(idx == stage)) {
 661                        shmem_dir_unmap(dir-1);
 662                        dir = shmem_dir_map(info->i_indirect) +
 663                            ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
 664                        while (!*dir) {
 665                                dir++;
 666                                idx += ENTRIES_PER_PAGEPAGE;
 667                                if (idx >= limit)
 668                                        goto lost1;
 669                        }
 670                        stage = idx + ENTRIES_PER_PAGEPAGE;
 671                        subdir = *dir;
 672                        shmem_dir_unmap(dir);
 673                        dir = shmem_dir_map(subdir);
 674                }
 675                subdir = *dir;
 676                if (subdir && subdir->nr_swapped) {
 677                        ptr = shmem_swp_map(subdir);
 678                        size = limit - idx;
 679                        if (size > ENTRIES_PER_PAGE)
 680                                size = ENTRIES_PER_PAGE;
 681                        offset = shmem_find_swp(entry, ptr, ptr+size);
 682                        if (offset >= 0) {
 683                                shmem_dir_unmap(dir);
 684                                goto found;
 685                        }
 686                        shmem_swp_unmap(ptr);
 687                }
 688        }
 689lost1:
 690        shmem_dir_unmap(dir-1);
 691lost2:
 692        spin_unlock(&info->lock);
 693        return 0;
 694found:
 695        idx += offset;
 696        inode = &info->vfs_inode;
 697        if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
 698                info->flags |= SHMEM_PAGEIN;
 699                shmem_swp_set(info, ptr + offset, 0);
 700        }
 701        shmem_swp_unmap(ptr);
 702        spin_unlock(&info->lock);
 703        /*
 704         * Decrement swap count even when the entry is left behind:
 705         * try_to_unuse will skip over mms, then reincrement count.
 706         */
 707        swap_free(entry);
 708        return 1;
 709}
 710
 711/*
 712 * shmem_unuse() search for an eventually swapped out shmem page.
 713 */
 714int shmem_unuse(swp_entry_t entry, struct page *page)
 715{
 716        struct list_head *p;
 717        struct shmem_inode_info *info;
 718        int found = 0;
 719
 720        spin_lock(&shmem_ilock);
 721        list_for_each(p, &shmem_inodes) {
 722                info = list_entry(p, struct shmem_inode_info, list);
 723
 724                if (info->swapped && shmem_unuse_inode(info, entry, page)) {
 725                        /* move head to start search for next from here */
 726                        list_move_tail(&shmem_inodes, &info->list);
 727                        found = 1;
 728                        break;
 729                }
 730        }
 731        spin_unlock(&shmem_ilock);
 732        return found;
 733}
 734
 735/*
 736 * Move the page from the page cache to the swap cache.
 737 */
 738static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 739{
 740        struct shmem_inode_info *info;
 741        swp_entry_t *entry, swap;
 742        struct address_space *mapping;
 743        unsigned long index;
 744        struct inode *inode;
 745
 746        BUG_ON(!PageLocked(page));
 747        BUG_ON(page_mapped(page));
 748
 749        mapping = page->mapping;
 750        index = page->index;
 751        inode = mapping->host;
 752        info = SHMEM_I(inode);
 753        if (info->flags & VM_LOCKED)
 754                goto redirty;
 755        swap = get_swap_page();
 756        if (!swap.val)
 757                goto redirty;
 758
 759        spin_lock(&info->lock);
 760        shmem_recalc_inode(inode);
 761        if (index >= info->next_index) {
 762                BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 763                goto unlock;
 764        }
 765        entry = shmem_swp_entry(info, index, NULL);
 766        BUG_ON(!entry);
 767        BUG_ON(entry->val);
 768
 769        if (move_to_swap_cache(page, swap) == 0) {
 770                shmem_swp_set(info, entry, swap.val);
 771                shmem_swp_unmap(entry);
 772                spin_unlock(&info->lock);
 773                unlock_page(page);
 774                return 0;
 775        }
 776
 777        shmem_swp_unmap(entry);
 778unlock:
 779        spin_unlock(&info->lock);
 780        swap_free(swap);
 781redirty:
 782        set_page_dirty(page);
 783        return WRITEPAGE_ACTIVATE;      /* Return with the page locked */
 784}
 785
 786#ifdef CONFIG_NUMA
 787static struct page *shmem_swapin_async(struct shared_policy *p,
 788                                       swp_entry_t entry, unsigned long idx)
 789{
 790        struct page *page;
 791        struct vm_area_struct pvma;
 792
 793        /* Create a pseudo vma that just contains the policy */
 794        memset(&pvma, 0, sizeof(struct vm_area_struct));
 795        pvma.vm_end = PAGE_SIZE;
 796        pvma.vm_pgoff = idx;
 797        pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
 798        page = read_swap_cache_async(entry, &pvma, 0);
 799        mpol_free(pvma.vm_policy);
 800        return page;
 801}
 802
 803struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
 804                          unsigned long idx)
 805{
 806        struct shared_policy *p = &info->policy;
 807        int i, num;
 808        struct page *page;
 809        unsigned long offset;
 810
 811        num = valid_swaphandles(entry, &offset);
 812        for (i = 0; i < num; offset++, i++) {
 813                page = shmem_swapin_async(p,
 814                                swp_entry(swp_type(entry), offset), idx);
 815                if (!page)
 816                        break;
 817                page_cache_release(page);
 818        }
 819        lru_add_drain();        /* Push any new pages onto the LRU now */
 820        return shmem_swapin_async(p, entry, idx);
 821}
 822
 823static struct page *
 824shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info,
 825                 unsigned long idx)
 826{
 827        struct vm_area_struct pvma;
 828        struct page *page;
 829
 830        memset(&pvma, 0, sizeof(struct vm_area_struct));
 831        pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
 832        pvma.vm_pgoff = idx;
 833        pvma.vm_end = PAGE_SIZE;
 834        page = alloc_page_vma(gfp, &pvma, 0);
 835        mpol_free(pvma.vm_policy);
 836        return page;
 837}
 838#else
 839static inline struct page *
 840shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 841{
 842        swapin_readahead(entry, 0, NULL);
 843        return read_swap_cache_async(entry, NULL, 0);
 844}
 845
 846static inline struct page *
 847shmem_alloc_page(unsigned long gfp,struct shmem_inode_info *info,
 848                                 unsigned long idx)
 849{
 850        return alloc_page(gfp);
 851}
 852#endif
 853
 854/*
 855 * shmem_getpage - either get the page from swap or allocate a new one
 856 *
 857 * If we allocate a new one we do not mark it dirty. That's up to the
 858 * vm. If we swap it in we mark it dirty since we also free the swap
 859 * entry since a page cannot live in both the swap and page cache
 860 */
 861static int shmem_getpage(struct inode *inode, unsigned long idx,
 862                        struct page **pagep, enum sgp_type sgp, int *type)
 863{
 864        struct address_space *mapping = inode->i_mapping;
 865        struct shmem_inode_info *info = SHMEM_I(inode);
 866        struct shmem_sb_info *sbinfo;
 867        struct page *filepage = *pagep;
 868        struct page *swappage;
 869        swp_entry_t *entry;
 870        swp_entry_t swap;
 871        int error, majmin = VM_FAULT_MINOR;
 872
 873        if (idx >= SHMEM_MAX_INDEX)
 874                return -EFBIG;
 875        /*
 876         * Normally, filepage is NULL on entry, and either found
 877         * uptodate immediately, or allocated and zeroed, or read
 878         * in under swappage, which is then assigned to filepage.
 879         * But shmem_prepare_write passes in a locked filepage,
 880         * which may be found not uptodate by other callers too,
 881         * and may need to be copied from the swappage read in.
 882         */
 883repeat:
 884        if (!filepage)
 885                filepage = find_lock_page(mapping, idx);
 886        if (filepage && PageUptodate(filepage))
 887                goto done;
 888        error = 0;
 889        if (sgp == SGP_QUICK)
 890                goto failed;
 891
 892        spin_lock(&info->lock);
 893        shmem_recalc_inode(inode);
 894        entry = shmem_swp_alloc(info, idx, sgp);
 895        if (IS_ERR(entry)) {
 896                spin_unlock(&info->lock);
 897                error = PTR_ERR(entry);
 898                goto failed;
 899        }
 900        swap = *entry;
 901
 902        if (swap.val) {
 903                /* Look it up and read it in.. */
 904                swappage = lookup_swap_cache(swap);
 905                if (!swappage) {
 906                        shmem_swp_unmap(entry);
 907                        spin_unlock(&info->lock);
 908                        /* here we actually do the io */
 909                        if (majmin == VM_FAULT_MINOR && type)
 910                                inc_page_state(pgmajfault);
 911                        majmin = VM_FAULT_MAJOR;
 912                        swappage = shmem_swapin(info, swap, idx);
 913                        if (!swappage) {
 914                                spin_lock(&info->lock);
 915                                entry = shmem_swp_alloc(info, idx, sgp);
 916                                if (IS_ERR(entry))
 917                                        error = PTR_ERR(entry);
 918                                else {
 919                                        if (entry->val == swap.val)
 920                                                error = -ENOMEM;
 921                                        shmem_swp_unmap(entry);
 922                                }
 923                                spin_unlock(&info->lock);
 924                                if (error)
 925                                        goto failed;
 926                                goto repeat;
 927                        }
 928                        wait_on_page_locked(swappage);
 929                        page_cache_release(swappage);
 930                        goto repeat;
 931                }
 932
 933                /* We have to do this with page locked to prevent races */
 934                if (TestSetPageLocked(swappage)) {
 935                        shmem_swp_unmap(entry);
 936                        spin_unlock(&info->lock);
 937                        wait_on_page_locked(swappage);
 938                        page_cache_release(swappage);
 939                        goto repeat;
 940                }
 941                if (PageWriteback(swappage)) {
 942                        shmem_swp_unmap(entry);
 943                        spin_unlock(&info->lock);
 944                        wait_on_page_writeback(swappage);
 945                        unlock_page(swappage);
 946                        page_cache_release(swappage);
 947                        goto repeat;
 948                }
 949                if (!PageUptodate(swappage)) {
 950                        shmem_swp_unmap(entry);
 951                        spin_unlock(&info->lock);
 952                        unlock_page(swappage);
 953                        page_cache_release(swappage);
 954                        error = -EIO;
 955                        goto failed;
 956                }
 957
 958                if (filepage) {
 959                        shmem_swp_set(info, entry, 0);
 960                        shmem_swp_unmap(entry);
 961                        delete_from_swap_cache(swappage);
 962                        spin_unlock(&info->lock);
 963                        copy_highpage(filepage, swappage);
 964                        unlock_page(swappage);
 965                        page_cache_release(swappage);
 966                        flush_dcache_page(filepage);
 967                        SetPageUptodate(filepage);
 968                        set_page_dirty(filepage);
 969                        swap_free(swap);
 970                } else if (!(error = move_from_swap_cache(
 971                                swappage, idx, mapping))) {
 972                        info->flags |= SHMEM_PAGEIN;
 973                        shmem_swp_set(info, entry, 0);
 974                        shmem_swp_unmap(entry);
 975                        spin_unlock(&info->lock);
 976                        filepage = swappage;
 977                        swap_free(swap);
 978                } else {
 979                        shmem_swp_unmap(entry);
 980                        spin_unlock(&info->lock);
 981                        unlock_page(swappage);
 982                        page_cache_release(swappage);
 983                        if (error == -ENOMEM) {
 984                                /* let kswapd refresh zone for GFP_ATOMICs */
 985                                blk_congestion_wait(WRITE, HZ/50);
 986                        }
 987                        goto repeat;
 988                }
 989        } else if (sgp == SGP_READ && !filepage) {
 990                shmem_swp_unmap(entry);
 991                filepage = find_get_page(mapping, idx);
 992                if (filepage &&
 993                    (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
 994                        spin_unlock(&info->lock);
 995                        wait_on_page_locked(filepage);
 996                        page_cache_release(filepage);
 997                        filepage = NULL;
 998                        goto repeat;
 999                }
1000                spin_unlock(&info->lock);
1001        } else {
1002                shmem_swp_unmap(entry);
1003                sbinfo = SHMEM_SB(inode->i_sb);
1004                spin_lock(&sbinfo->stat_lock);
1005                if (sbinfo->free_blocks == 0 || shmem_acct_block(info->flags)) {
1006                        spin_unlock(&sbinfo->stat_lock);
1007                        spin_unlock(&info->lock);
1008                        error = -ENOSPC;
1009                        goto failed;
1010                }
1011                sbinfo->free_blocks--;
1012                inode->i_blocks += BLOCKS_PER_PAGE;
1013                spin_unlock(&sbinfo->stat_lock);
1014
1015                if (!filepage) {
1016                        spin_unlock(&info->lock);
1017                        filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
1018                                                    info,
1019                                                    idx);
1020                        if (!filepage) {
1021                                shmem_unacct_blocks(info->flags, 1);
1022                                shmem_free_block(inode);
1023                                error = -ENOMEM;
1024                                goto failed;
1025                        }
1026
1027                        spin_lock(&info->lock);
1028                        entry = shmem_swp_alloc(info, idx, sgp);
1029                        if (IS_ERR(entry))
1030                                error = PTR_ERR(entry);
1031                        else {
1032                                swap = *entry;
1033                                shmem_swp_unmap(entry);
1034                        }
1035                        if (error || swap.val || 0 != add_to_page_cache_lru(
1036                                        filepage, mapping, idx, GFP_ATOMIC)) {
1037                                spin_unlock(&info->lock);
1038                                page_cache_release(filepage);
1039                                shmem_unacct_blocks(info->flags, 1);
1040                                shmem_free_block(inode);
1041                                filepage = NULL;
1042                                if (error)
1043                                        goto failed;
1044                                goto repeat;
1045                        }
1046                        info->flags |= SHMEM_PAGEIN;
1047                }
1048
1049                info->alloced++;
1050                spin_unlock(&info->lock);
1051                clear_highpage(filepage);
1052                flush_dcache_page(filepage);
1053                SetPageUptodate(filepage);
1054        }
1055done:
1056        if (!*pagep) {
1057                if (filepage) {
1058                        unlock_page(filepage);
1059                        *pagep = filepage;
1060                } else
1061                        *pagep = ZERO_PAGE(0);
1062        }
1063        if (type)
1064                *type = majmin;
1065        return 0;
1066
1067failed:
1068        if (*pagep != filepage) {
1069                unlock_page(filepage);
1070                page_cache_release(filepage);
1071        }
1072        return error;
1073}
1074
1075struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
1076{
1077        struct inode *inode = vma->vm_file->f_dentry->d_inode;
1078        struct page *page = NULL;
1079        unsigned long idx;
1080        int error;
1081
1082        idx = (address - vma->vm_start) >> PAGE_SHIFT;
1083        idx += vma->vm_pgoff;
1084        idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
1085
1086        error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
1087        if (error)
1088                return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
1089
1090        mark_page_accessed(page);
1091        return page;
1092}
1093
1094static int shmem_populate(struct vm_area_struct *vma,
1095        unsigned long addr, unsigned long len,
1096        pgprot_t prot, unsigned long pgoff, int nonblock)
1097{
1098        struct inode *inode = vma->vm_file->f_dentry->d_inode;
1099        struct mm_struct *mm = vma->vm_mm;
1100        enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
1101        unsigned long size;
1102
1103        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1104        if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
1105                return -EINVAL;
1106
1107        while ((long) len > 0) {
1108                struct page *page = NULL;
1109                int err;
1110                /*
1111                 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
1112                 */
1113                err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
1114                if (err)
1115                        return err;
1116                if (page) {
1117                        mark_page_accessed(page);
1118                        err = install_page(mm, vma, addr, page, prot);
1119                        if (err) {
1120                                page_cache_release(page);
1121                                return err;
1122                        }
1123                } else if (nonblock) {
1124                        err = install_file_pte(mm, vma, addr, pgoff, prot);
1125                        if (err)
1126                                return err;
1127                }
1128
1129                len -= PAGE_SIZE;
1130                addr += PAGE_SIZE;
1131                pgoff++;
1132        }
1133        return 0;
1134}
1135
1136#ifdef CONFIG_NUMA
1137int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
1138{
1139        struct inode *i = vma->vm_file->f_dentry->d_inode;
1140        return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
1141}
1142
1143struct mempolicy *
1144shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
1145{
1146        struct inode *i = vma->vm_file->f_dentry->d_inode;
1147        unsigned long idx;
1148
1149        idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1150        return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
1151}
1152#endif
1153
1154void shmem_lock(struct file *file, int lock)
1155{
1156        struct inode *inode = file->f_dentry->d_inode;
1157        struct shmem_inode_info *info = SHMEM_I(inode);
1158
1159        spin_lock(&info->lock);
1160        if (lock)
1161                info->flags |= VM_LOCKED;
1162        else
1163                info->flags &= ~VM_LOCKED;
1164        spin_unlock(&info->lock);
1165}
1166
1167static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1168{
1169        file_accessed(file);
1170        vma->vm_ops = &shmem_vm_ops;
1171        return 0;
1172}
1173
1174static struct inode *
1175shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1176{
1177        struct inode *inode;
1178        struct shmem_inode_info *info;
1179        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1180
1181        spin_lock(&sbinfo->stat_lock);
1182        if (!sbinfo->free_inodes) {
1183                spin_unlock(&sbinfo->stat_lock);
1184                return NULL;
1185        }
1186        sbinfo->free_inodes--;
1187        spin_unlock(&sbinfo->stat_lock);
1188
1189        inode = new_inode(sb);
1190        if (inode) {
1191                inode->i_mode = mode;
1192                inode->i_uid = current->fsuid;
1193                inode->i_gid = current->fsgid;
1194                inode->i_blksize = PAGE_CACHE_SIZE;
1195                inode->i_blocks = 0;
1196                inode->i_mapping->a_ops = &shmem_aops;
1197                inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1198                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1199                info = SHMEM_I(inode);
1200                memset(info, 0, (char *)inode - (char *)info);
1201                spin_lock_init(&info->lock);
1202                mpol_shared_policy_init(&info->policy);
1203                switch (mode & S_IFMT) {
1204                default:
1205                        init_special_inode(inode, mode, dev);
1206                        break;
1207                case S_IFREG:
1208                        inode->i_op = &shmem_inode_operations;
1209                        inode->i_fop = &shmem_file_operations;
1210                        spin_lock(&shmem_ilock);
1211                        list_add_tail(&info->list, &shmem_inodes);
1212                        spin_unlock(&shmem_ilock);
1213                        break;
1214                case S_IFDIR:
1215                        inode->i_nlink++;
1216                        /* Some things misbehave if size == 0 on a directory */
1217                        inode->i_size = 2 * BOGO_DIRENT_SIZE;
1218                        inode->i_op = &shmem_dir_inode_operations;
1219                        inode->i_fop = &simple_dir_operations;
1220                        break;
1221                case S_IFLNK:
1222                        break;
1223                }
1224        }
1225        return inode;
1226}
1227
1228static int shmem_set_size(struct shmem_sb_info *info,
1229                          unsigned long max_blocks, unsigned long max_inodes)
1230{
1231        int error;
1232        unsigned long blocks, inodes;
1233
1234        spin_lock(&info->stat_lock);
1235        blocks = info->max_blocks - info->free_blocks;
1236        inodes = info->max_inodes - info->free_inodes;
1237        error = -EINVAL;
1238        if (max_blocks < blocks)
1239                goto out;
1240        if (max_inodes < inodes)
1241                goto out;
1242        error = 0;
1243        info->max_blocks  = max_blocks;
1244        info->free_blocks = max_blocks - blocks;
1245        info->max_inodes  = max_inodes;
1246        info->free_inodes = max_inodes - inodes;
1247out:
1248        spin_unlock(&info->stat_lock);
1249        return error;
1250}
1251
1252#ifdef CONFIG_TMPFS
1253
1254static struct inode_operations shmem_symlink_inode_operations;
1255static struct inode_operations shmem_symlink_inline_operations;
1256
1257/*
1258 * Normally tmpfs makes no use of shmem_prepare_write, but it
1259 * lets a tmpfs file be used read-write below the loop driver.
1260 */
1261static int
1262shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
1263{
1264        struct inode *inode = page->mapping->host;
1265        return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
1266}
1267
1268static ssize_t
1269shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
1270{
1271        struct inode    *inode = file->f_dentry->d_inode;
1272        loff_t          pos;
1273        unsigned long   written;
1274        int             err;
1275
1276        if ((ssize_t) count < 0)
1277                return -EINVAL;
1278
1279        if (!access_ok(VERIFY_READ, buf, count))
1280                return -EFAULT;
1281
1282        down(&inode->i_sem);
1283
1284        pos = *ppos;
1285        written = 0;
1286
1287        err = generic_write_checks(file, &pos, &count, 0);
1288        if (err || !count)
1289                goto out;
1290
1291        err = remove_suid(file->f_dentry);
1292        if (err)
1293                goto out;
1294
1295        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1296
1297        do {
1298                struct page *page = NULL;
1299                unsigned long bytes, index, offset;
1300                char *kaddr;
1301                int left;
1302
1303                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1304                index = pos >> PAGE_CACHE_SHIFT;
1305                bytes = PAGE_CACHE_SIZE - offset;
1306                if (bytes > count)
1307                        bytes = count;
1308
1309                /*
1310                 * We don't hold page lock across copy from user -
1311                 * what would it guard against? - so no deadlock here.
1312                 * But it still may be a good idea to prefault below.
1313                 */
1314
1315                err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
1316                if (err)
1317                        break;
1318
1319                left = bytes;
1320                if (PageHighMem(page)) {
1321                        volatile unsigned char dummy;
1322                        __get_user(dummy, buf);
1323                        __get_user(dummy, buf + bytes - 1);
1324
1325                        kaddr = kmap_atomic(page, KM_USER0);
1326                        left = __copy_from_user(kaddr + offset, buf, bytes);
1327                        kunmap_atomic(kaddr, KM_USER0);
1328                }
1329                if (left) {
1330                        kaddr = kmap(page);
1331                        left = __copy_from_user(kaddr + offset, buf, bytes);
1332                        kunmap(page);
1333                }
1334
1335                written += bytes;
1336                count -= bytes;
1337                pos += bytes;
1338                buf += bytes;
1339                if (pos > inode->i_size)
1340                        i_size_write(inode, pos);
1341
1342                flush_dcache_page(page);
1343                set_page_dirty(page);
1344                mark_page_accessed(page);
1345                page_cache_release(page);
1346
1347                if (left) {
1348                        pos -= left;
1349                        written -= left;
1350                        err = -EFAULT;
1351                        break;
1352                }
1353
1354                /*
1355                 * Our dirty pages are not counted in nr_dirty,
1356                 * and we do not attempt to balance dirty pages.
1357                 */
1358
1359                cond_resched();
1360        } while (count);
1361
1362        *ppos = pos;
1363        if (written)
1364                err = written;
1365out:
1366        up(&inode->i_sem);
1367        return err;
1368}
1369
1370static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1371{
1372        struct inode *inode = filp->f_dentry->d_inode;
1373        struct address_space *mapping = inode->i_mapping;
1374        unsigned long index, offset;
1375
1376        index = *ppos >> PAGE_CACHE_SHIFT;
1377        offset = *ppos & ~PAGE_CACHE_MASK;
1378
1379        for (;;) {
1380                struct page *page = NULL;
1381                unsigned long end_index, nr, ret;
1382                loff_t i_size = i_size_read(inode);
1383
1384                end_index = i_size >> PAGE_CACHE_SHIFT;
1385                if (index > end_index)
1386                        break;
1387                if (index == end_index) {
1388                        nr = i_size & ~PAGE_CACHE_MASK;
1389                        if (nr <= offset)
1390                                break;
1391                }
1392
1393                desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
1394                if (desc->error) {
1395                        if (desc->error == -EINVAL)
1396                                desc->error = 0;
1397                        break;
1398                }
1399
1400                /*
1401                 * We must evaluate after, since reads (unlike writes)
1402                 * are called without i_sem protection against truncate
1403                 */
1404                nr = PAGE_CACHE_SIZE;
1405                i_size = i_size_read(inode);
1406                end_index = i_size >> PAGE_CACHE_SHIFT;
1407                if (index == end_index) {
1408                        nr = i_size & ~PAGE_CACHE_MASK;
1409                        if (nr <= offset) {
1410                                page_cache_release(page);
1411                                break;
1412                        }
1413                }
1414                nr -= offset;
1415
1416                if (page != ZERO_PAGE(0)) {
1417                        /*
1418                         * If users can be writing to this page using arbitrary
1419                         * virtual addresses, take care about potential aliasing
1420                         * before reading the page on the kernel side.
1421                         */
1422                        if (mapping_writably_mapped(mapping))
1423                                flush_dcache_page(page);
1424                        /*
1425                         * Mark the page accessed if we read the beginning.
1426                         */
1427                        if (!offset)
1428                                mark_page_accessed(page);
1429                }
1430
1431                /*
1432                 * Ok, we have the page, and it's up-to-date, so
1433                 * now we can copy it to user space...
1434                 *
1435                 * The actor routine returns how many bytes were actually used..
1436                 * NOTE! This may not be the same as how much of a user buffer
1437                 * we filled up (we may be padding etc), so we can only update
1438                 * "pos" here (the actor routine has to update the user buffer
1439                 * pointers and the remaining count).
1440                 */
1441                ret = actor(desc, page, offset, nr);
1442                offset += ret;
1443                index += offset >> PAGE_CACHE_SHIFT;
1444                offset &= ~PAGE_CACHE_MASK;
1445
1446                page_cache_release(page);
1447                if (ret != nr || !desc->count)
1448                        break;
1449
1450                cond_resched();
1451        }
1452
1453        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1454        file_accessed(filp);
1455}
1456
1457static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
1458{
1459        read_descriptor_t desc;
1460
1461        if ((ssize_t) count < 0)
1462                return -EINVAL;
1463        if (!access_ok(VERIFY_WRITE, buf, count))
1464                return -EFAULT;
1465        if (!count)
1466                return 0;
1467
1468        desc.written = 0;
1469        desc.count = count;
1470        desc.arg.buf = buf;
1471        desc.error = 0;
1472
1473        do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1474        if (desc.written)
1475                return desc.written;
1476        return desc.error;
1477}
1478
1479static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1480                         size_t count, read_actor_t actor, void *target)
1481{
1482        read_descriptor_t desc;
1483
1484        if (!count)
1485                return 0;
1486
1487        desc.written = 0;
1488        desc.count = count;
1489        desc.arg.data = target;
1490        desc.error = 0;
1491
1492        do_shmem_file_read(in_file, ppos, &desc, actor);
1493        if (desc.written)
1494                return desc.written;
1495        return desc.error;
1496}
1497
1498static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
1499{
1500        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1501
1502        buf->f_type = TMPFS_MAGIC;
1503        buf->f_bsize = PAGE_CACHE_SIZE;
1504        spin_lock(&sbinfo->stat_lock);
1505        buf->f_blocks = sbinfo->max_blocks;
1506        buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1507        buf->f_files = sbinfo->max_inodes;
1508        buf->f_ffree = sbinfo->free_inodes;
1509        spin_unlock(&sbinfo->stat_lock);
1510        buf->f_namelen = NAME_MAX;
1511        return 0;
1512}
1513
1514/*
1515 * File creation. Allocate an inode, and we're done..
1516 */
1517static int
1518shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1519{
1520        struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1521        int error = -ENOSPC;
1522
1523        if (inode) {
1524                if (dir->i_mode & S_ISGID) {
1525                        inode->i_gid = dir->i_gid;
1526                        if (S_ISDIR(mode))
1527                                inode->i_mode |= S_ISGID;
1528                }
1529                dir->i_size += BOGO_DIRENT_SIZE;
1530                dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1531                d_instantiate(dentry, inode);
1532                dget(dentry); /* Extra count - pin the dentry in core */
1533                error = 0;
1534        }
1535        return error;
1536}
1537
1538static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1539{
1540        int error;
1541
1542        if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1543                return error;
1544        dir->i_nlink++;
1545        return 0;
1546}
1547
1548static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
1549                struct nameidata *nd)
1550{
1551        return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1552}
1553
1554/*
1555 * Link a file..
1556 */
1557static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1558{
1559        struct inode *inode = old_dentry->d_inode;
1560
1561        dir->i_size += BOGO_DIRENT_SIZE;
1562        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1563        inode->i_nlink++;
1564        atomic_inc(&inode->i_count);    /* New dentry reference */
1565        dget(dentry);           /* Extra pinning count for the created dentry */
1566        d_instantiate(dentry, inode);
1567        return 0;
1568}
1569
1570static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1571{
1572        struct inode *inode = dentry->d_inode;
1573
1574        dir->i_size -= BOGO_DIRENT_SIZE;
1575        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1576        inode->i_nlink--;
1577        dput(dentry);   /* Undo the count from "create" - this does all the work */
1578        return 0;
1579}
1580
1581static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1582{
1583        if (!simple_empty(dentry))
1584                return -ENOTEMPTY;
1585
1586        dir->i_nlink--;
1587        return shmem_unlink(dir, dentry);
1588}
1589
1590/*
1591 * The VFS layer already does all the dentry stuff for rename,
1592 * we just have to decrement the usage count for the target if
1593 * it exists so that the VFS layer correctly free's it when it
1594 * gets overwritten.
1595 */
1596static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1597{
1598        struct inode *inode = old_dentry->d_inode;
1599        int they_are_dirs = S_ISDIR(inode->i_mode);
1600
1601        if (!simple_empty(new_dentry))
1602                return -ENOTEMPTY;
1603
1604        if (new_dentry->d_inode) {
1605                (void) shmem_unlink(new_dir, new_dentry);
1606                if (they_are_dirs)
1607                        old_dir->i_nlink--;
1608        } else if (they_are_dirs) {
1609                old_dir->i_nlink--;
1610                new_dir->i_nlink++;
1611        }
1612
1613        old_dir->i_size -= BOGO_DIRENT_SIZE;
1614        new_dir->i_size += BOGO_DIRENT_SIZE;
1615        old_dir->i_ctime = old_dir->i_mtime =
1616        new_dir->i_ctime = new_dir->i_mtime =
1617        inode->i_ctime = CURRENT_TIME;
1618        return 0;
1619}
1620
1621static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1622{
1623        int error;
1624        int len;
1625        struct inode *inode;
1626        struct page *page = NULL;
1627        char *kaddr;
1628        struct shmem_inode_info *info;
1629
1630        len = strlen(symname) + 1;
1631        if (len > PAGE_CACHE_SIZE)
1632                return -ENAMETOOLONG;
1633
1634        inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1635        if (!inode)
1636                return -ENOSPC;
1637
1638        info = SHMEM_I(inode);
1639        inode->i_size = len-1;
1640        if (len <= (char *)inode - (char *)info) {
1641                /* do it inline */
1642                memcpy(info, symname, len);
1643                inode->i_op = &shmem_symlink_inline_operations;
1644        } else {
1645                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
1646                if (error) {
1647                        iput(inode);
1648                        return error;
1649                }
1650                inode->i_op = &shmem_symlink_inode_operations;
1651                spin_lock(&shmem_ilock);
1652                list_add_tail(&info->list, &shmem_inodes);
1653                spin_unlock(&shmem_ilock);
1654                kaddr = kmap_atomic(page, KM_USER0);
1655                memcpy(kaddr, symname, len);
1656                kunmap_atomic(kaddr, KM_USER0);
1657                set_page_dirty(page);
1658                page_cache_release(page);
1659        }
1660        if (dir->i_mode & S_ISGID)
1661                inode->i_gid = dir->i_gid;
1662        dir->i_size += BOGO_DIRENT_SIZE;
1663        dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1664        d_instantiate(dentry, inode);
1665        dget(dentry);
1666        return 0;
1667}
1668
1669static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1670{
1671        nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
1672        return 0;
1673}
1674
1675static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1676{
1677        struct page *page = NULL;
1678        int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1679        nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
1680        return 0;
1681}
1682
1683static void shmem_put_link(struct dentry *dentry, struct nameidata *nd)
1684{
1685        if (!IS_ERR(nd_get_link(nd))) {
1686                struct page *page;
1687
1688                page = find_get_page(dentry->d_inode->i_mapping, 0);
1689                if (!page)
1690                        BUG();
1691                kunmap(page);
1692                mark_page_accessed(page);
1693                page_cache_release(page);
1694                page_cache_release(page);
1695        }
1696}
1697
1698static struct inode_operations shmem_symlink_inline_operations = {
1699        .readlink       = generic_readlink,
1700        .follow_link    = shmem_follow_link_inline,
1701};
1702
1703static struct inode_operations shmem_symlink_inode_operations = {
1704        .truncate       = shmem_truncate,
1705        .readlink       = generic_readlink,
1706        .follow_link    = shmem_follow_link,
1707        .put_link       = shmem_put_link,
1708};
1709
1710static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1711{
1712        char *this_char, *value, *rest;
1713
1714        while ((this_char = strsep(&options, ",")) != NULL) {
1715                if (!*this_char)
1716                        continue;
1717                if ((value = strchr(this_char,'=')) != NULL) {
1718                        *value++ = 0;
1719                } else {
1720                        printk(KERN_ERR
1721                            "tmpfs: No value for mount option '%s'\n",
1722                            this_char);
1723                        return 1;
1724                }
1725
1726                if (!strcmp(this_char,"size")) {
1727                        unsigned long long size;
1728                        size = memparse(value,&rest);
1729                        if (*rest == '%') {
1730                                size <<= PAGE_SHIFT;
1731                                size *= totalram_pages;
1732                                do_div(size, 100);
1733                                rest++;
1734                        }
1735                        if (*rest)
1736                                goto bad_val;
1737                        *blocks = size >> PAGE_CACHE_SHIFT;
1738                } else if (!strcmp(this_char,"nr_blocks")) {
1739                        *blocks = memparse(value,&rest);
1740                        if (*rest)
1741                                goto bad_val;
1742                } else if (!strcmp(this_char,"nr_inodes")) {
1743                        *inodes = memparse(value,&rest);
1744                        if (*rest)
1745                                goto bad_val;
1746                } else if (!strcmp(this_char,"mode")) {
1747                        if (!mode)
1748                                continue;
1749                        *mode = simple_strtoul(value,&rest,8);
1750                        if (*rest)
1751                                goto bad_val;
1752                } else if (!strcmp(this_char,"uid")) {
1753                        if (!uid)
1754                                continue;
1755                        *uid = simple_strtoul(value,&rest,0);
1756                        if (*rest)
1757                                goto bad_val;
1758                } else if (!strcmp(this_char,"gid")) {
1759                        if (!gid)
1760                                continue;
1761                        *gid = simple_strtoul(value,&rest,0);
1762                        if (*rest)
1763                                goto bad_val;
1764                } else {
1765                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1766                               this_char);
1767                        return 1;
1768                }
1769        }
1770        return 0;
1771
1772bad_val:
1773        printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1774               value, this_char);
1775        return 1;
1776
1777}
1778
1779static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1780{
1781        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1782        unsigned long max_blocks = sbinfo->max_blocks;
1783        unsigned long max_inodes = sbinfo->max_inodes;
1784
1785        if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1786                return -EINVAL;
1787        return shmem_set_size(sbinfo, max_blocks, max_inodes);
1788}
1789#endif
1790
1791static int shmem_fill_super(struct super_block *sb,
1792                            void *data, int silent)
1793{
1794        struct inode *inode;
1795        struct dentry *root;
1796        unsigned long blocks, inodes;
1797        int mode   = S_IRWXUGO | S_ISVTX;
1798        uid_t uid = current->fsuid;
1799        gid_t gid = current->fsgid;
1800        struct shmem_sb_info *sbinfo;
1801        int err = -ENOMEM;
1802
1803        sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
1804        if (!sbinfo)
1805                return -ENOMEM;
1806        sb->s_fs_info = sbinfo;
1807        memset(sbinfo, 0, sizeof(struct shmem_sb_info));
1808
1809        /*
1810         * Per default we only allow half of the physical ram per
1811         * tmpfs instance
1812         */
1813        blocks = inodes = totalram_pages / 2;
1814
1815#ifdef CONFIG_TMPFS
1816        if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
1817                err = -EINVAL;
1818                goto failed;
1819        }
1820#else
1821        sb->s_flags |= MS_NOUSER;
1822#endif
1823
1824        spin_lock_init(&sbinfo->stat_lock);
1825        sbinfo->max_blocks = blocks;
1826        sbinfo->free_blocks = blocks;
1827        sbinfo->max_inodes = inodes;
1828        sbinfo->free_inodes = inodes;
1829        sb->s_maxbytes = SHMEM_MAX_BYTES;
1830        sb->s_blocksize = PAGE_CACHE_SIZE;
1831        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1832        sb->s_magic = TMPFS_MAGIC;
1833        sb->s_op = &shmem_ops;
1834        inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1835        if (!inode)
1836                goto failed;
1837        inode->i_uid = uid;
1838        inode->i_gid = gid;
1839        root = d_alloc_root(inode);
1840        if (!root)
1841                goto failed_iput;
1842        sb->s_root = root;
1843        return 0;
1844
1845failed_iput:
1846        iput(inode);
1847failed:
1848        kfree(sbinfo);
1849        sb->s_fs_info = NULL;
1850        return err;
1851}
1852
1853static void shmem_put_super(struct super_block *sb)
1854{
1855        kfree(sb->s_fs_info);
1856        sb->s_fs_info = NULL;
1857}
1858
1859static kmem_cache_t *shmem_inode_cachep;
1860
1861static struct inode *shmem_alloc_inode(struct super_block *sb)
1862{
1863        struct shmem_inode_info *p;
1864        p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
1865        if (!p)
1866                return NULL;
1867        return &p->vfs_inode;
1868}
1869
1870static void shmem_destroy_inode(struct inode *inode)
1871{
1872        mpol_free_shared_policy(&SHMEM_I(inode)->policy);
1873        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
1874}
1875
1876static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1877{
1878        struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
1879
1880        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1881            SLAB_CTOR_CONSTRUCTOR) {
1882                inode_init_once(&p->vfs_inode);
1883        }
1884}
1885
1886static int init_inodecache(void)
1887{
1888        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
1889                                sizeof(struct shmem_inode_info),
1890                                0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
1891                                init_once, NULL);
1892        if (shmem_inode_cachep == NULL)
1893                return -ENOMEM;
1894        return 0;
1895}
1896
1897static void destroy_inodecache(void)
1898{
1899        if (kmem_cache_destroy(shmem_inode_cachep))
1900                printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
1901}
1902
1903static struct address_space_operations shmem_aops = {
1904        .writepage      = shmem_writepage,
1905        .set_page_dirty = __set_page_dirty_nobuffers,
1906#ifdef CONFIG_TMPFS
1907        .prepare_write  = shmem_prepare_write,
1908        .commit_write   = simple_commit_write,
1909#endif
1910};
1911
1912static struct file_operations shmem_file_operations = {
1913        .mmap           = shmem_mmap,
1914#ifdef CONFIG_TMPFS
1915        .llseek         = generic_file_llseek,
1916        .read           = shmem_file_read,
1917        .write          = shmem_file_write,
1918        .fsync          = simple_sync_file,
1919        .sendfile       = shmem_file_sendfile,
1920#endif
1921};
1922
1923static struct inode_operations shmem_inode_operations = {
1924        .truncate       = shmem_truncate,
1925        .setattr        = shmem_notify_change,
1926};
1927
1928static struct inode_operations shmem_dir_inode_operations = {
1929#ifdef CONFIG_TMPFS
1930        .create         = shmem_create,
1931        .lookup         = simple_lookup,
1932        .link           = shmem_link,
1933        .unlink         = shmem_unlink,
1934        .symlink        = shmem_symlink,
1935        .mkdir          = shmem_mkdir,
1936        .rmdir          = shmem_rmdir,
1937        .mknod          = shmem_mknod,
1938        .rename         = shmem_rename,
1939#endif
1940};
1941
1942static struct super_operations shmem_ops = {
1943        .alloc_inode    = shmem_alloc_inode,
1944        .destroy_inode  = shmem_destroy_inode,
1945#ifdef CONFIG_TMPFS
1946        .statfs         = shmem_statfs,
1947        .remount_fs     = shmem_remount_fs,
1948#endif
1949        .delete_inode   = shmem_delete_inode,
1950        .drop_inode     = generic_delete_inode,
1951        .put_super      = shmem_put_super,
1952};
1953
1954static struct vm_operations_struct shmem_vm_ops = {
1955        .nopage         = shmem_nopage,
1956        .populate       = shmem_populate,
1957#ifdef CONFIG_NUMA
1958        .set_policy     = shmem_set_policy,
1959        .get_policy     = shmem_get_policy,
1960#endif
1961};
1962
1963static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
1964        int flags, const char *dev_name, void *data)
1965{
1966        return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
1967}
1968
1969static struct file_system_type tmpfs_fs_type = {
1970        .owner          = THIS_MODULE,
1971        .name           = "tmpfs",
1972        .get_sb         = shmem_get_sb,
1973        .kill_sb        = kill_litter_super,
1974};
1975static struct vfsmount *shm_mnt;
1976
1977static int __init init_tmpfs(void)
1978{
1979        int error;
1980
1981        error = init_inodecache();
1982        if (error)
1983                goto out3;
1984
1985        error = register_filesystem(&tmpfs_fs_type);
1986        if (error) {
1987                printk(KERN_ERR "Could not register tmpfs\n");
1988                goto out2;
1989        }
1990#ifdef CONFIG_TMPFS
1991        devfs_mk_dir("shm");
1992#endif
1993        shm_mnt = kern_mount(&tmpfs_fs_type);
1994        if (IS_ERR(shm_mnt)) {
1995                error = PTR_ERR(shm_mnt);
1996                printk(KERN_ERR "Could not kern_mount tmpfs\n");
1997                goto out1;
1998        }
1999
2000        /* The internal instance should not do size checking */
2001        shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
2002        return 0;
2003
2004out1:
2005        unregister_filesystem(&tmpfs_fs_type);
2006out2:
2007        destroy_inodecache();
2008out3:
2009        shm_mnt = ERR_PTR(error);
2010        return error;
2011}
2012module_init(init_tmpfs)
2013
2014/*
2015 * shmem_file_setup - get an unlinked file living in tmpfs
2016 *
2017 * @name: name for dentry (to be seen in /proc/<pid>/maps
2018 * @size: size to be set for the file
2019 *
2020 */
2021struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2022{
2023        int error;
2024        struct file *file;
2025        struct inode *inode;
2026        struct dentry *dentry, *root;
2027        struct qstr this;
2028
2029        if (IS_ERR(shm_mnt))
2030                return (void *)shm_mnt;
2031
2032        if (size > SHMEM_MAX_BYTES)
2033                return ERR_PTR(-EINVAL);
2034
2035        if (shmem_acct_size(flags, size))
2036                return ERR_PTR(-ENOMEM);
2037
2038        error = -ENOMEM;
2039        this.name = name;
2040        this.len = strlen(name);
2041        this.hash = 0; /* will go */
2042        root = shm_mnt->mnt_root;
2043        dentry = d_alloc(root, &this);
2044        if (!dentry)
2045                goto put_memory;
2046
2047        error = -ENFILE;
2048        file = get_empty_filp();
2049        if (!file)
2050                goto put_dentry;
2051
2052        error = -ENOSPC;
2053        inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
2054        if (!inode)
2055                goto close_file;
2056
2057        SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
2058        d_instantiate(dentry, inode);
2059        inode->i_size = size;
2060        inode->i_nlink = 0;     /* It is unlinked */
2061        file->f_vfsmnt = mntget(shm_mnt);
2062        file->f_dentry = dentry;
2063        file->f_mapping = inode->i_mapping;
2064        file->f_op = &shmem_file_operations;
2065        file->f_mode = FMODE_WRITE | FMODE_READ;
2066        return(file);
2067
2068close_file:
2069        put_filp(file);
2070put_dentry:
2071        dput(dentry);
2072put_memory:
2073        shmem_unacct_size(flags, size);
2074        return ERR_PTR(error);
2075}
2076
2077/*
2078 * shmem_zero_setup - setup a shared anonymous mapping
2079 *
2080 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
2081 */
2082int shmem_zero_setup(struct vm_area_struct *vma)
2083{
2084        struct file *file;
2085        loff_t size = vma->vm_end - vma->vm_start;
2086
2087        file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2088        if (IS_ERR(file))
2089                return PTR_ERR(file);
2090
2091        if (vma->vm_file)
2092                fput(vma->vm_file);
2093        vma->vm_file = file;
2094        vma->vm_ops = &shmem_vm_ops;
2095        return 0;
2096}
2097
2098EXPORT_SYMBOL(shmem_file_setup);
2099
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.