linux-old/fs/inode.c
<<
>>
Prefs
   1/*
   2 * linux/fs/inode.c
   3 *
   4 * (C) 1997 Linus Torvalds
   5 */
   6
   7#include <linux/config.h>
   8#include <linux/fs.h>
   9#include <linux/string.h>
  10#include <linux/mm.h>
  11#include <linux/dcache.h>
  12#include <linux/init.h>
  13#include <linux/quotaops.h>
  14#include <linux/slab.h>
  15#include <linux/cache.h>
  16#include <linux/swap.h>
  17#include <linux/swapctl.h>
  18#include <linux/prefetch.h>
  19#include <linux/locks.h>
  20
  21/*
  22 * New inode.c implementation.
  23 *
  24 * This implementation has the basic premise of trying
  25 * to be extremely low-overhead and SMP-safe, yet be
  26 * simple enough to be "obviously correct".
  27 *
  28 * Famous last words.
  29 */
  30
  31/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
  32
  33/* #define INODE_PARANOIA 1 */
  34/* #define INODE_DEBUG 1 */
  35
  36/*
  37 * Inode lookup is no longer as critical as it used to be:
  38 * most of the lookups are going to be through the dcache.
  39 */
  40#define I_HASHBITS      i_hash_shift
  41#define I_HASHMASK      i_hash_mask
  42
  43static unsigned int i_hash_mask;
  44static unsigned int i_hash_shift;
  45
  46/*
  47 * Each inode can be on two separate lists. One is
  48 * the hash list of the inode, used for lookups. The
  49 * other linked list is the "type" list:
  50 *  "in_use" - valid inode, i_count > 0, i_nlink > 0
  51 *  "dirty"  - as "in_use" but also dirty
  52 *  "unused" - valid inode, i_count = 0
  53 *
  54 * A "dirty" list is maintained for each super block,
  55 * allowing for low-overhead inode sync() operations.
  56 */
  57
  58static LIST_HEAD(inode_in_use);
  59static LIST_HEAD(inode_unused);
  60static struct list_head *inode_hashtable;
  61static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
  62
  63/*
  64 * A simple spinlock to protect the list manipulations.
  65 *
  66 * NOTE! You also have to own the lock if you change
  67 * the i_state of an inode while it is in use..
  68 */
  69static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
  70
  71/*
  72 * Statistics gathering..
  73 */
  74struct inodes_stat_t inodes_stat;
  75
  76static kmem_cache_t * inode_cachep;
  77
  78#define alloc_inode() \
  79         ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
  80static void destroy_inode(struct inode *inode) 
  81{
  82        if (inode_has_buffers(inode))
  83                BUG();
  84        kmem_cache_free(inode_cachep, (inode));
  85}
  86
  87
  88/*
  89 * These are initializations that only need to be done
  90 * once, because the fields are idempotent across use
  91 * of the inode, so let the slab aware of that.
  92 */
  93static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
  94{
  95        struct inode * inode = (struct inode *) foo;
  96
  97        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
  98            SLAB_CTOR_CONSTRUCTOR)
  99        {
 100                memset(inode, 0, sizeof(*inode));
 101                init_waitqueue_head(&inode->i_wait);
 102                INIT_LIST_HEAD(&inode->i_hash);
 103                INIT_LIST_HEAD(&inode->i_data.clean_pages);
 104                INIT_LIST_HEAD(&inode->i_data.dirty_pages);
 105                INIT_LIST_HEAD(&inode->i_data.locked_pages);
 106                INIT_LIST_HEAD(&inode->i_dentry);
 107                INIT_LIST_HEAD(&inode->i_dirty_buffers);
 108                INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
 109                INIT_LIST_HEAD(&inode->i_devices);
 110                sema_init(&inode->i_sem, 1);
 111                sema_init(&inode->i_zombie, 1);
 112                spin_lock_init(&inode->i_data.i_shared_lock);
 113        }
 114}
 115
 116/*
 117 * Put the inode on the super block's dirty list.
 118 *
 119 * CAREFUL! We mark it dirty unconditionally, but
 120 * move it onto the dirty list only if it is hashed.
 121 * If it was not hashed, it will never be added to
 122 * the dirty list even if it is later hashed, as it
 123 * will have been marked dirty already.
 124 *
 125 * In short, make sure you hash any inodes _before_
 126 * you start marking them dirty..
 127 */
 128 
 129/**
 130 *      __mark_inode_dirty -    internal function
 131 *      @inode: inode to mark
 132 *      @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
 133 *      Mark an inode as dirty. Callers should use mark_inode_dirty or
 134 *      mark_inode_dirty_sync.
 135 */
 136 
 137void __mark_inode_dirty(struct inode *inode, int flags)
 138{
 139        struct super_block * sb = inode->i_sb;
 140
 141        if (!sb)
 142                return;
 143
 144        /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
 145        if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 146                if (sb->s_op && sb->s_op->dirty_inode)
 147                        sb->s_op->dirty_inode(inode);
 148        }
 149
 150        /* avoid the locking if we can */
 151        if ((inode->i_state & flags) == flags)
 152                return;
 153
 154        spin_lock(&inode_lock);
 155        if ((inode->i_state & flags) != flags) {
 156                inode->i_state |= flags;
 157                /* Only add valid (ie hashed) inodes to the dirty list */
 158                if (!(inode->i_state & I_LOCK) && !list_empty(&inode->i_hash)) {
 159                        list_del(&inode->i_list);
 160                        list_add(&inode->i_list, &sb->s_dirty);
 161                }
 162        }
 163        spin_unlock(&inode_lock);
 164}
 165
 166static void __wait_on_inode(struct inode * inode)
 167{
 168        DECLARE_WAITQUEUE(wait, current);
 169
 170        add_wait_queue(&inode->i_wait, &wait);
 171repeat:
 172        set_current_state(TASK_UNINTERRUPTIBLE);
 173        if (inode->i_state & I_LOCK) {
 174                schedule();
 175                goto repeat;
 176        }
 177        remove_wait_queue(&inode->i_wait, &wait);
 178        current->state = TASK_RUNNING;
 179}
 180
 181static inline void wait_on_inode(struct inode *inode)
 182{
 183        if (inode->i_state & I_LOCK)
 184                __wait_on_inode(inode);
 185}
 186
 187
 188static inline void write_inode(struct inode *inode, int sync)
 189{
 190        if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
 191                inode->i_sb->s_op->write_inode(inode, sync);
 192}
 193
 194static inline void __iget(struct inode * inode)
 195{
 196        if (atomic_read(&inode->i_count)) {
 197                atomic_inc(&inode->i_count);
 198                return;
 199        }
 200        atomic_inc(&inode->i_count);
 201        if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
 202                list_del(&inode->i_list);
 203                list_add(&inode->i_list, &inode_in_use);
 204        }
 205        inodes_stat.nr_unused--;
 206}
 207
 208static inline void __sync_one(struct inode *inode, int sync)
 209{
 210        unsigned dirty;
 211
 212        list_del(&inode->i_list);
 213        list_add(&inode->i_list, &inode->i_sb->s_locked_inodes);
 214
 215        if (inode->i_state & I_LOCK)
 216                BUG();
 217
 218        /* Set I_LOCK, reset I_DIRTY */
 219        dirty = inode->i_state & I_DIRTY;
 220        inode->i_state |= I_LOCK;
 221        inode->i_state &= ~I_DIRTY;
 222        spin_unlock(&inode_lock);
 223
 224        filemap_fdatasync(inode->i_mapping);
 225
 226        /* Don't write the inode if only I_DIRTY_PAGES was set */
 227        if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
 228                write_inode(inode, sync);
 229
 230        filemap_fdatawait(inode->i_mapping);
 231
 232        spin_lock(&inode_lock);
 233        inode->i_state &= ~I_LOCK;
 234        if (!(inode->i_state & I_FREEING)) {
 235                struct list_head *to;
 236                if (inode->i_state & I_DIRTY)
 237                        to = &inode->i_sb->s_dirty;
 238                else if (atomic_read(&inode->i_count))
 239                        to = &inode_in_use;
 240                else
 241                        to = &inode_unused;
 242                list_del(&inode->i_list);
 243                list_add(&inode->i_list, to);
 244        }
 245        wake_up(&inode->i_wait);
 246}
 247
 248static inline void sync_one(struct inode *inode, int sync)
 249{
 250        while (inode->i_state & I_LOCK) {
 251                __iget(inode);
 252                spin_unlock(&inode_lock);
 253                __wait_on_inode(inode);
 254                iput(inode);
 255                spin_lock(&inode_lock);
 256        }
 257
 258        __sync_one(inode, sync);
 259}
 260
 261static inline void sync_list(struct list_head *head)
 262{
 263        struct list_head * tmp;
 264
 265        while ((tmp = head->prev) != head) 
 266                __sync_one(list_entry(tmp, struct inode, i_list), 0);
 267}
 268
 269static inline void wait_on_locked(struct list_head *head)
 270{
 271        struct list_head * tmp;
 272        while ((tmp = head->prev) != head) {
 273                struct inode *inode = list_entry(tmp, struct inode, i_list);
 274                __iget(inode);
 275                spin_unlock(&inode_lock);
 276                __wait_on_inode(inode);
 277                iput(inode);
 278                spin_lock(&inode_lock);
 279        }
 280}
 281
 282static inline int try_to_sync_unused_list(struct list_head *head, int nr_inodes)
 283{
 284        struct list_head *tmp = head;
 285        struct inode *inode;
 286
 287        while (nr_inodes && (tmp = tmp->prev) != head) {
 288                inode = list_entry(tmp, struct inode, i_list);
 289
 290                if (!atomic_read(&inode->i_count)) {
 291                        __sync_one(inode, 0);
 292                        nr_inodes--;
 293
 294                        /* 
 295                         * __sync_one moved the inode to another list,
 296                         * so we have to start looking from the list head.
 297                         */
 298                        tmp = head;
 299                }
 300        }
 301
 302        return nr_inodes;
 303}
 304
 305void sync_inodes_sb(struct super_block *sb)
 306{
 307        spin_lock(&inode_lock);
 308        while (!list_empty(&sb->s_dirty)||!list_empty(&sb->s_locked_inodes)) {
 309                sync_list(&sb->s_dirty);
 310                wait_on_locked(&sb->s_locked_inodes);
 311        }
 312        spin_unlock(&inode_lock);
 313}
 314
 315/*
 316 * Note:
 317 * We don't need to grab a reference to superblock here. If it has non-empty
 318 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
 319 * past sync_inodes_sb() until both ->s_dirty and ->s_locked_inodes are
 320 * empty. Since __sync_one() regains inode_lock before it finally moves
 321 * inode from superblock lists we are OK.
 322 */
 323
 324void sync_unlocked_inodes(void)
 325{
 326        struct super_block * sb;
 327        spin_lock(&inode_lock);
 328        spin_lock(&sb_lock);
 329        sb = sb_entry(super_blocks.next);
 330        for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
 331                if (!list_empty(&sb->s_dirty)) {
 332                        spin_unlock(&sb_lock);
 333                        sync_list(&sb->s_dirty);
 334                        spin_lock(&sb_lock);
 335                }
 336        }
 337        spin_unlock(&sb_lock);
 338        spin_unlock(&inode_lock);
 339}
 340
 341/*
 342 * Find a superblock with inodes that need to be synced
 343 */
 344
 345static struct super_block *get_super_to_sync(void)
 346{
 347        struct list_head *p;
 348restart:
 349        spin_lock(&inode_lock);
 350        spin_lock(&sb_lock);
 351        list_for_each(p, &super_blocks) {
 352                struct super_block *s = list_entry(p,struct super_block,s_list);
 353                if (list_empty(&s->s_dirty) && list_empty(&s->s_locked_inodes))
 354                        continue;
 355                s->s_count++;
 356                spin_unlock(&sb_lock);
 357                spin_unlock(&inode_lock);
 358                down_read(&s->s_umount);
 359                if (!s->s_root) {
 360                        drop_super(s);
 361                        goto restart;
 362                }
 363                return s;
 364        }
 365        spin_unlock(&sb_lock);
 366        spin_unlock(&inode_lock);
 367        return NULL;
 368}
 369
 370/**
 371 *      sync_inodes
 372 *      @dev: device to sync the inodes from.
 373 *
 374 *      sync_inodes goes through the super block's dirty list, 
 375 *      writes them out, and puts them back on the normal list.
 376 */
 377
 378void sync_inodes(kdev_t dev)
 379{
 380        struct super_block * s;
 381
 382        /*
 383         * Search the super_blocks array for the device(s) to sync.
 384         */
 385        if (dev) {
 386                if ((s = get_super(dev)) != NULL) {
 387                        sync_inodes_sb(s);
 388                        drop_super(s);
 389                }
 390        } else {
 391                while ((s = get_super_to_sync()) != NULL) {
 392                        sync_inodes_sb(s);
 393                        drop_super(s);
 394                }
 395        }
 396}
 397
 398static void try_to_sync_unused_inodes(void * arg)
 399{
 400        struct super_block * sb;
 401        int nr_inodes = inodes_stat.nr_unused;
 402
 403        spin_lock(&inode_lock);
 404        spin_lock(&sb_lock);
 405        sb = sb_entry(super_blocks.next);
 406        for (; nr_inodes && sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
 407                if (list_empty(&sb->s_dirty))
 408                        continue;
 409                spin_unlock(&sb_lock);
 410                nr_inodes = try_to_sync_unused_list(&sb->s_dirty, nr_inodes);
 411                spin_lock(&sb_lock);
 412        }
 413        spin_unlock(&sb_lock);
 414        spin_unlock(&inode_lock);
 415}
 416
 417static struct tq_struct unused_inodes_flush_task;
 418
 419/**
 420 *      write_inode_now -       write an inode to disk
 421 *      @inode: inode to write to disk
 422 *      @sync: whether the write should be synchronous or not
 423 *
 424 *      This function commits an inode to disk immediately if it is
 425 *      dirty. This is primarily needed by knfsd.
 426 */
 427 
 428void write_inode_now(struct inode *inode, int sync)
 429{
 430        struct super_block * sb = inode->i_sb;
 431
 432        if (sb) {
 433                spin_lock(&inode_lock);
 434                while (inode->i_state & I_DIRTY)
 435                        sync_one(inode, sync);
 436                spin_unlock(&inode_lock);
 437                if (sync)
 438                        wait_on_inode(inode);
 439        }
 440        else
 441                printk(KERN_ERR "write_inode_now: no super block\n");
 442}
 443
 444/**
 445 * generic_osync_inode - flush all dirty data for a given inode to disk
 446 * @inode: inode to write
 447 * @datasync: if set, don't bother flushing timestamps
 448 *
 449 * This can be called by file_write functions for files which have the
 450 * O_SYNC flag set, to flush dirty writes to disk.  
 451 */
 452
 453int generic_osync_inode(struct inode *inode, int what)
 454{
 455        int err = 0, err2 = 0, need_write_inode_now = 0;
 456        
 457        /* 
 458         * WARNING
 459         *
 460         * Currently, the filesystem write path does not pass the
 461         * filp down to the low-level write functions.  Therefore it
 462         * is impossible for (say) __block_commit_write to know if
 463         * the operation is O_SYNC or not.
 464         *
 465         * Ideally, O_SYNC writes would have the filesystem call
 466         * ll_rw_block as it went to kick-start the writes, and we
 467         * could call osync_inode_buffers() here to wait only for
 468         * those IOs which have already been submitted to the device
 469         * driver layer.  As it stands, if we did this we'd not write
 470         * anything to disk since our writes have not been queued by
 471         * this point: they are still on the dirty LRU.
 472         * 
 473         * So, currently we will call fsync_inode_buffers() instead,
 474         * to flush _all_ dirty buffers for this inode to disk on 
 475         * every O_SYNC write, not just the synchronous I/Os.  --sct
 476         */
 477
 478        if (what & OSYNC_METADATA)
 479                err = fsync_inode_buffers(inode);
 480        if (what & OSYNC_DATA)
 481                err2 = fsync_inode_data_buffers(inode);
 482        if (!err)
 483                err = err2;
 484
 485        spin_lock(&inode_lock);
 486        if ((inode->i_state & I_DIRTY) &&
 487            ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
 488                need_write_inode_now = 1;
 489        spin_unlock(&inode_lock);
 490
 491        if (need_write_inode_now)
 492                write_inode_now(inode, 1);
 493        else
 494                wait_on_inode(inode);
 495
 496        return err;
 497}
 498
 499/**
 500 * clear_inode - clear an inode
 501 * @inode: inode to clear
 502 *
 503 * This is called by the filesystem to tell us
 504 * that the inode is no longer useful. We just
 505 * terminate it with extreme prejudice.
 506 */
 507 
 508void clear_inode(struct inode *inode)
 509{
 510        invalidate_inode_buffers(inode);
 511       
 512        if (inode->i_data.nrpages)
 513                BUG();
 514        if (!(inode->i_state & I_FREEING))
 515                BUG();
 516        if (inode->i_state & I_CLEAR)
 517                BUG();
 518        wait_on_inode(inode);
 519        DQUOT_DROP(inode);
 520        if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
 521                inode->i_sb->s_op->clear_inode(inode);
 522        if (inode->i_bdev)
 523                bd_forget(inode);
 524        else if (inode->i_cdev) {
 525                cdput(inode->i_cdev);
 526                inode->i_cdev = NULL;
 527        }
 528        inode->i_state = I_CLEAR;
 529}
 530
 531/*
 532 * Dispose-list gets a local list with local inodes in it, so it doesn't
 533 * need to worry about list corruption and SMP locks.
 534 */
 535static void dispose_list(struct list_head * head)
 536{
 537        struct list_head * inode_entry;
 538        struct inode * inode;
 539
 540        while ((inode_entry = head->next) != head)
 541        {
 542                list_del(inode_entry);
 543
 544                inode = list_entry(inode_entry, struct inode, i_list);
 545                if (inode->i_data.nrpages)
 546                        truncate_inode_pages(&inode->i_data, 0);
 547                clear_inode(inode);
 548                destroy_inode(inode);
 549                inodes_stat.nr_inodes--;
 550        }
 551}
 552
 553/*
 554 * Invalidate all inodes for a device.
 555 */
 556static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
 557{
 558        struct list_head *next;
 559        int busy = 0, count = 0;
 560
 561        next = head->next;
 562        for (;;) {
 563                struct list_head * tmp = next;
 564                struct inode * inode;
 565
 566                next = next->next;
 567                if (tmp == head)
 568                        break;
 569                inode = list_entry(tmp, struct inode, i_list);
 570                if (inode->i_sb != sb)
 571                        continue;
 572                invalidate_inode_buffers(inode);
 573                if (!atomic_read(&inode->i_count)) {
 574                        list_del_init(&inode->i_hash);
 575                        list_del(&inode->i_list);
 576                        list_add(&inode->i_list, dispose);
 577                        inode->i_state |= I_FREEING;
 578                        count++;
 579                        continue;
 580                }
 581                busy = 1;
 582        }
 583        /* only unused inodes may be cached with i_count zero */
 584        inodes_stat.nr_unused -= count;
 585        return busy;
 586}
 587
 588/*
 589 * This is a two-stage process. First we collect all
 590 * offending inodes onto the throw-away list, and in
 591 * the second stage we actually dispose of them. This
 592 * is because we don't want to sleep while messing
 593 * with the global lists..
 594 */
 595 
 596/**
 597 *      invalidate_inodes       - discard the inodes on a device
 598 *      @sb: superblock
 599 *
 600 *      Discard all of the inodes for a given superblock. If the discard
 601 *      fails because there are busy inodes then a non zero value is returned.
 602 *      If the discard is successful all the inodes have been discarded.
 603 */
 604 
 605int invalidate_inodes(struct super_block * sb)
 606{
 607        int busy;
 608        LIST_HEAD(throw_away);
 609
 610        spin_lock(&inode_lock);
 611        busy = invalidate_list(&inode_in_use, sb, &throw_away);
 612        busy |= invalidate_list(&inode_unused, sb, &throw_away);
 613        busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
 614        busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
 615        spin_unlock(&inode_lock);
 616
 617        dispose_list(&throw_away);
 618
 619        return busy;
 620}
 621 
 622int invalidate_device(kdev_t dev, int do_sync)
 623{
 624        struct super_block *sb;
 625        int res;
 626
 627        if (do_sync)
 628                fsync_dev(dev);
 629
 630        res = 0;
 631        sb = get_super(dev);
 632        if (sb) {
 633                /*
 634                 * no need to lock the super, get_super holds the
 635                 * read semaphore so the filesystem cannot go away
 636                 * under us (->put_super runs with the write lock
 637                 * hold).
 638                 */
 639                shrink_dcache_sb(sb);
 640                res = invalidate_inodes(sb);
 641                drop_super(sb);
 642        }
 643        invalidate_buffers(dev);
 644        return res;
 645}
 646
 647
 648/*
 649 * This is called with the inode lock held. It searches
 650 * the in-use for freeable inodes, which are moved to a
 651 * temporary list and then placed on the unused list by
 652 * dispose_list. 
 653 *
 654 * We don't expect to have to call this very often.
 655 *
 656 * N.B. The spinlock is released during the call to
 657 *      dispose_list.
 658 */
 659#define CAN_UNUSE(inode) \
 660        ((((inode)->i_state | (inode)->i_data.nrpages) == 0)  && \
 661         !inode_has_buffers(inode))
 662#define INODE(entry)    (list_entry(entry, struct inode, i_list))
 663
 664void prune_icache(int goal)
 665{
 666        LIST_HEAD(list);
 667        struct list_head *entry, *freeable = &list;
 668        int count;
 669        struct inode * inode;
 670
 671        spin_lock(&inode_lock);
 672
 673        count = 0;
 674        entry = inode_unused.prev;
 675        while (entry != &inode_unused)
 676        {
 677                struct list_head *tmp = entry;
 678
 679                entry = entry->prev;
 680                inode = INODE(tmp);
 681                if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
 682                        continue;
 683                if (!CAN_UNUSE(inode))
 684                        continue;
 685                if (atomic_read(&inode->i_count))
 686                        continue;
 687                list_del(tmp);
 688                list_del(&inode->i_hash);
 689                INIT_LIST_HEAD(&inode->i_hash);
 690                list_add(tmp, freeable);
 691                inode->i_state |= I_FREEING;
 692                count++;
 693                if (!--goal)
 694                        break;
 695        }
 696        inodes_stat.nr_unused -= count;
 697        spin_unlock(&inode_lock);
 698
 699        dispose_list(freeable);
 700
 701        /* 
 702         * If we didn't freed enough clean inodes schedule
 703         * a sync of the dirty inodes, we cannot do it
 704         * from here or we're either synchronously dogslow
 705         * or we deadlock with oom.
 706         */
 707        if (goal)
 708                schedule_task(&unused_inodes_flush_task);
 709}
 710
 711int shrink_icache_memory(int priority, int gfp_mask)
 712{
 713        int count = 0;
 714
 715        /*
 716         * Nasty deadlock avoidance..
 717         *
 718         * We may hold various FS locks, and we don't
 719         * want to recurse into the FS that called us
 720         * in clear_inode() and friends..
 721         */
 722        if (!(gfp_mask & __GFP_FS))
 723                return 0;
 724
 725        count = inodes_stat.nr_unused / priority;
 726
 727        prune_icache(count);
 728        return kmem_cache_shrink(inode_cachep);
 729}
 730
 731/*
 732 * Called with the inode lock held.
 733 * NOTE: we are not increasing the inode-refcount, you must call __iget()
 734 * by hand after calling find_inode now! This simplifies iunique and won't
 735 * add any additional branch in the common code.
 736 */
 737static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
 738{
 739        struct list_head *tmp;
 740        struct inode * inode;
 741
 742        tmp = head;
 743        for (;;) {
 744                tmp = tmp->next;
 745                inode = NULL;
 746                if (tmp == head)
 747                        break;
 748                inode = list_entry(tmp, struct inode, i_hash);
 749                if (inode->i_ino != ino)
 750                        continue;
 751                if (inode->i_sb != sb)
 752                        continue;
 753                if (find_actor && !find_actor(inode, ino, opaque))
 754                        continue;
 755                break;
 756        }
 757        return inode;
 758}
 759
 760/*
 761 * This just initializes the inode fields
 762 * to known values before returning the inode..
 763 *
 764 * i_sb, i_ino, i_count, i_state and the lists have
 765 * been initialized elsewhere..
 766 */
 767static void clean_inode(struct inode *inode)
 768{
 769        static struct address_space_operations empty_aops;
 770        static struct inode_operations empty_iops;
 771        static struct file_operations empty_fops;
 772        memset(&inode->u, 0, sizeof(inode->u));
 773        inode->i_sock = 0;
 774        inode->i_op = &empty_iops;
 775        inode->i_fop = &empty_fops;
 776        inode->i_nlink = 1;
 777        atomic_set(&inode->i_writecount, 0);
 778        inode->i_size = 0;
 779        inode->i_blocks = 0;
 780        inode->i_generation = 0;
 781        memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 782        inode->i_pipe = NULL;
 783        inode->i_bdev = NULL;
 784        inode->i_cdev = NULL;
 785        inode->i_data.a_ops = &empty_aops;
 786        inode->i_data.host = inode;
 787        inode->i_data.gfp_mask = GFP_HIGHUSER;
 788        inode->i_mapping = &inode->i_data;
 789}
 790
 791/**
 792 * get_empty_inode      - obtain an inode
 793 *
 794 * This is called by things like the networking layer
 795 * etc that want to get an inode without any inode
 796 * number, or filesystems that allocate new inodes with
 797 * no pre-existing information.
 798 *
 799 * On a successful return the inode pointer is returned. On a failure
 800 * a %NULL pointer is returned. The returned inode is not on any superblock
 801 * lists.
 802 */
 803 
 804struct inode * get_empty_inode(void)
 805{
 806        static unsigned long last_ino;
 807        struct inode * inode;
 808
 809        spin_lock_prefetch(&inode_lock);
 810        
 811        inode = alloc_inode();
 812        if (inode)
 813        {
 814                spin_lock(&inode_lock);
 815                inodes_stat.nr_inodes++;
 816                list_add(&inode->i_list, &inode_in_use);
 817                inode->i_sb = NULL;
 818                inode->i_dev = 0;
 819                inode->i_blkbits = 0;
 820                inode->i_ino = ++last_ino;
 821                inode->i_flags = 0;
 822                atomic_set(&inode->i_count, 1);
 823                inode->i_state = 0;
 824                spin_unlock(&inode_lock);
 825                clean_inode(inode);
 826        }
 827        return inode;
 828}
 829
 830/*
 831 * This is called without the inode lock held.. Be careful.
 832 *
 833 * We no longer cache the sb_flags in i_flags - see fs.h
 834 *      -- rmk@arm.uk.linux.org
 835 */
 836static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
 837{
 838        struct inode * inode;
 839
 840        inode = alloc_inode();
 841        if (inode) {
 842                struct inode * old;
 843
 844                spin_lock(&inode_lock);
 845                /* We released the lock, so.. */
 846                old = find_inode(sb, ino, head, find_actor, opaque);
 847                if (!old) {
 848                        inodes_stat.nr_inodes++;
 849                        list_add(&inode->i_list, &inode_in_use);
 850                        list_add(&inode->i_hash, head);
 851                        inode->i_sb = sb;
 852                        inode->i_dev = sb->s_dev;
 853                        inode->i_blkbits = sb->s_blocksize_bits;
 854                        inode->i_ino = ino;
 855                        inode->i_flags = 0;
 856                        atomic_set(&inode->i_count, 1);
 857                        inode->i_state = I_LOCK;
 858                        spin_unlock(&inode_lock);
 859
 860                        clean_inode(inode);
 861
 862                        /* reiserfs specific hack right here.  We don't
 863                        ** want this to last, and are looking for VFS changes
 864                        ** that will allow us to get rid of it.
 865                        ** -- mason@suse.com 
 866                        */
 867                        if (sb->s_op->read_inode2) {
 868                                sb->s_op->read_inode2(inode, opaque) ;
 869                        } else {
 870                                sb->s_op->read_inode(inode);
 871                        }
 872
 873                        /*
 874                         * This is special!  We do not need the spinlock
 875                         * when clearing I_LOCK, because we're guaranteed
 876                         * that nobody else tries to do anything about the
 877                         * state of the inode when it is locked, as we
 878                         * just created it (so there can be no old holders
 879                         * that haven't tested I_LOCK).
 880                         */
 881                        inode->i_state &= ~I_LOCK;
 882                        wake_up(&inode->i_wait);
 883
 884                        return inode;
 885                }
 886
 887                /*
 888                 * Uhhuh, somebody else created the same inode under
 889                 * us. Use the old inode instead of the one we just
 890                 * allocated.
 891                 */
 892                __iget(old);
 893                spin_unlock(&inode_lock);
 894                destroy_inode(inode);
 895                inode = old;
 896                wait_on_inode(inode);
 897        }
 898        return inode;
 899}
 900
 901static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
 902{
 903        unsigned long tmp = i_ino + ((unsigned long) sb / L1_CACHE_BYTES);
 904        tmp = tmp + (tmp >> I_HASHBITS);
 905        return tmp & I_HASHMASK;
 906}
 907
 908/* Yeah, I know about quadratic hash. Maybe, later. */
 909
 910/**
 911 *      iunique - get a unique inode number
 912 *      @sb: superblock
 913 *      @max_reserved: highest reserved inode number
 914 *
 915 *      Obtain an inode number that is unique on the system for a given
 916 *      superblock. This is used by file systems that have no natural
 917 *      permanent inode numbering system. An inode number is returned that
 918 *      is higher than the reserved limit but unique.
 919 *
 920 *      BUGS:
 921 *      With a large number of inodes live on the file system this function
 922 *      currently becomes quite slow.
 923 */
 924 
 925ino_t iunique(struct super_block *sb, ino_t max_reserved)
 926{
 927        static ino_t counter = 0;
 928        struct inode *inode;
 929        struct list_head * head;
 930        ino_t res;
 931        spin_lock(&inode_lock);
 932retry:
 933        if (counter > max_reserved) {
 934                head = inode_hashtable + hash(sb,counter);
 935                inode = find_inode(sb, res = counter++, head, NULL, NULL);
 936                if (!inode) {
 937                        spin_unlock(&inode_lock);
 938                        return res;
 939                }
 940        } else {
 941                counter = max_reserved + 1;
 942        }
 943        goto retry;
 944        
 945}
 946
 947struct inode *igrab(struct inode *inode)
 948{
 949        spin_lock(&inode_lock);
 950        if (!(inode->i_state & I_FREEING))
 951                __iget(inode);
 952        else
 953                /*
 954                 * Handle the case where s_op->clear_inode is not been
 955                 * called yet, and somebody is calling igrab
 956                 * while the inode is getting freed.
 957                 */
 958                inode = NULL;
 959        spin_unlock(&inode_lock);
 960        return inode;
 961}
 962
 963
 964struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
 965{
 966        struct list_head * head = inode_hashtable + hash(sb,ino);
 967        struct inode * inode;
 968
 969        spin_lock(&inode_lock);
 970        inode = find_inode(sb, ino, head, find_actor, opaque);
 971        if (inode) {
 972                __iget(inode);
 973                spin_unlock(&inode_lock);
 974                wait_on_inode(inode);
 975                return inode;
 976        }
 977        spin_unlock(&inode_lock);
 978
 979        /*
 980         * get_new_inode() will do the right thing, re-trying the search
 981         * in case it had to block at any point.
 982         */
 983        return get_new_inode(sb, ino, head, find_actor, opaque);
 984}
 985
 986/**
 987 *      insert_inode_hash - hash an inode
 988 *      @inode: unhashed inode
 989 *
 990 *      Add an inode to the inode hash for this superblock. If the inode
 991 *      has no superblock it is added to a separate anonymous chain.
 992 */
 993 
 994void insert_inode_hash(struct inode *inode)
 995{
 996        struct list_head *head = &anon_hash_chain;
 997        if (inode->i_sb)
 998                head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
 999        spin_lock(&inode_lock);
1000        list_add(&inode->i_hash, head);
1001        spin_unlock(&inode_lock);
1002}
1003
1004/**
1005 *      remove_inode_hash - remove an inode from the hash
1006 *      @inode: inode to unhash
1007 *
1008 *      Remove an inode from the superblock or anonymous hash.
1009 */
1010 
1011void remove_inode_hash(struct inode *inode)
1012{
1013        spin_lock(&inode_lock);
1014        list_del(&inode->i_hash);
1015        INIT_LIST_HEAD(&inode->i_hash);
1016        spin_unlock(&inode_lock);
1017}
1018
1019/**
1020 *      iput    - put an inode 
1021 *      @inode: inode to put
1022 *
1023 *      Puts an inode, dropping its usage count. If the inode use count hits
1024 *      zero the inode is also then freed and may be destroyed.
1025 */
1026 
1027void iput(struct inode *inode)
1028{
1029        if (inode) {
1030                struct super_block *sb = inode->i_sb;
1031                struct super_operations *op = NULL;
1032
1033                if (inode->i_state == I_CLEAR)
1034                        BUG();
1035
1036                if (sb && sb->s_op)
1037                        op = sb->s_op;
1038                if (op && op->put_inode)
1039                        op->put_inode(inode);
1040
1041                if (!atomic_dec_and_lock(&inode->i_count, &inode_lock))
1042                        return;
1043
1044                if (!inode->i_nlink) {
1045                        list_del(&inode->i_hash);
1046                        INIT_LIST_HEAD(&inode->i_hash);
1047                        list_del(&inode->i_list);
1048                        INIT_LIST_HEAD(&inode->i_list);
1049                        inode->i_state|=I_FREEING;
1050                        inodes_stat.nr_inodes--;
1051                        spin_unlock(&inode_lock);
1052
1053                        if (inode->i_data.nrpages)
1054                                truncate_inode_pages(&inode->i_data, 0);
1055
1056                        if (op && op->delete_inode) {
1057                                void (*delete)(struct inode *) = op->delete_inode;
1058                                if (!is_bad_inode(inode))
1059                                        DQUOT_INIT(inode);
1060                                /* s_op->delete_inode internally recalls clear_inode() */
1061                                delete(inode);
1062                        } else
1063                                clear_inode(inode);
1064                        if (inode->i_state != I_CLEAR)
1065                                BUG();
1066                } else {
1067                        if (!list_empty(&inode->i_hash)) {
1068                                if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
1069                                        list_del(&inode->i_list);
1070                                        list_add(&inode->i_list, &inode_unused);
1071                                }
1072                                inodes_stat.nr_unused++;
1073                                spin_unlock(&inode_lock);
1074                                if (!sb || (sb->s_flags & MS_ACTIVE))
1075                                        return;
1076                                write_inode_now(inode, 1);
1077                                spin_lock(&inode_lock);
1078                                inodes_stat.nr_unused--;
1079                                list_del_init(&inode->i_hash);
1080                        }
1081                        list_del_init(&inode->i_list);
1082                        inode->i_state|=I_FREEING;
1083                        inodes_stat.nr_inodes--;
1084                        spin_unlock(&inode_lock);
1085                        if (inode->i_data.nrpages)
1086                                truncate_inode_pages(&inode->i_data, 0);
1087                        clear_inode(inode);
1088                }
1089                destroy_inode(inode);
1090        }
1091}
1092
1093void force_delete(struct inode *inode)
1094{
1095        /*
1096         * Kill off unused inodes ... iput() will unhash and
1097         * delete the inode if we set i_nlink to zero.
1098         */
1099        if (atomic_read(&inode->i_count) == 1)
1100                inode->i_nlink = 0;
1101}
1102
1103/**
1104 *      bmap    - find a block number in a file
1105 *      @inode: inode of file
1106 *      @block: block to find
1107 *
1108 *      Returns the block number on the device holding the inode that
1109 *      is the disk block number for the block of the file requested.
1110 *      That is, asked for block 4 of inode 1 the function will return the
1111 *      disk block relative to the disk start that holds that block of the 
1112 *      file.
1113 */
1114 
1115int bmap(struct inode * inode, int block)
1116{
1117        int res = 0;
1118        if (inode->i_mapping->a_ops->bmap)
1119                res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1120        return res;
1121}
1122
1123/*
1124 * Initialize the hash tables.
1125 */
1126void __init inode_init(unsigned long mempages)
1127{
1128        struct list_head *head;
1129        unsigned long order;
1130        unsigned int nr_hash;
1131        int i;
1132
1133        mempages >>= (14 - PAGE_SHIFT);
1134        mempages *= sizeof(struct list_head);
1135        for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
1136                ;
1137
1138        do {
1139                unsigned long tmp;
1140
1141                nr_hash = (1UL << order) * PAGE_SIZE /
1142                        sizeof(struct list_head);
1143                i_hash_mask = (nr_hash - 1);
1144
1145                tmp = nr_hash;
1146                i_hash_shift = 0;
1147                while ((tmp >>= 1UL) != 0UL)
1148                        i_hash_shift++;
1149
1150                inode_hashtable = (struct list_head *)
1151                        __get_free_pages(GFP_ATOMIC, order);
1152        } while (inode_hashtable == NULL && --order >= 0);
1153
1154        printk(KERN_INFO "Inode cache hash table entries: %d (order: %ld, %ld bytes)\n",
1155                        nr_hash, order, (PAGE_SIZE << order));
1156
1157        if (!inode_hashtable)
1158                panic("Failed to allocate inode hash table\n");
1159
1160        head = inode_hashtable;
1161        i = nr_hash;
1162        do {
1163                INIT_LIST_HEAD(head);
1164                head++;
1165                i--;
1166        } while (i);
1167
1168        /* inode slab cache */
1169        inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
1170                                         0, SLAB_HWCACHE_ALIGN, init_once,
1171                                         NULL);
1172        if (!inode_cachep)
1173                panic("cannot create inode slab cache");
1174
1175        unused_inodes_flush_task.routine = try_to_sync_unused_inodes;
1176}
1177
1178/**
1179 *      update_atime    -       update the access time
1180 *      @inode: inode accessed
1181 *
1182 *      Update the accessed time on an inode and mark it for writeback.
1183 *      This function automatically handles read only file systems and media,
1184 *      as well as the "noatime" flag and inode specific "noatime" markers.
1185 */
1186 
1187void update_atime (struct inode *inode)
1188{
1189        if (inode->i_atime == CURRENT_TIME)
1190                return;
1191        if ( IS_NOATIME (inode) ) return;
1192        if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
1193        if ( IS_RDONLY (inode) ) return;
1194        inode->i_atime = CURRENT_TIME;
1195        mark_inode_dirty_sync (inode);
1196}   /*  End Function update_atime  */
1197
1198
1199/*
1200 *      Quota functions that want to walk the inode lists..
1201 */
1202#ifdef CONFIG_QUOTA
1203
1204/* Functions back in dquot.c */
1205void put_dquot_list(struct list_head *);
1206int remove_inode_dquot_ref(struct inode *, short, struct list_head *);
1207
1208void remove_dquot_ref(struct super_block *sb, short type)
1209{
1210        struct inode *inode;
1211        struct list_head *act_head;
1212        LIST_HEAD(tofree_head);
1213
1214        if (!sb->dq_op)
1215                return; /* nothing to do */
1216        /* We have to be protected against other CPUs */
1217        lock_kernel();          /* This lock is for quota code */
1218        spin_lock(&inode_lock); /* This lock is for inodes code */
1219 
1220        list_for_each(act_head, &inode_in_use) {
1221                inode = list_entry(act_head, struct inode, i_list);
1222                if (inode->i_sb == sb && IS_QUOTAINIT(inode))
1223                        remove_inode_dquot_ref(inode, type, &tofree_head);
1224        }
1225        list_for_each(act_head, &inode_unused) {
1226                inode = list_entry(act_head, struct inode, i_list);
1227                if (inode->i_sb == sb && IS_QUOTAINIT(inode))
1228                        remove_inode_dquot_ref(inode, type, &tofree_head);
1229        }
1230        list_for_each(act_head, &sb->s_dirty) {
1231                inode = list_entry(act_head, struct inode, i_list);
1232                if (IS_QUOTAINIT(inode))
1233                        remove_inode_dquot_ref(inode, type, &tofree_head);
1234        }
1235        list_for_each(act_head, &sb->s_locked_inodes) {
1236                inode = list_entry(act_head, struct inode, i_list);
1237                if (IS_QUOTAINIT(inode))
1238                        remove_inode_dquot_ref(inode, type, &tofree_head);
1239        }
1240        spin_unlock(&inode_lock);
1241        unlock_kernel();
1242
1243        put_dquot_list(&tofree_head);
1244}
1245
1246#endif
1247
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.