linux-bk/fs/inode.c
<<
>>
Prefs
   1/*
   2 * linux/fs/inode.c
   3 *
   4 * (C) 1997 Linus Torvalds
   5 */
   6
   7#include <linux/config.h>
   8#include <linux/fs.h>
   9#include <linux/mm.h>
  10#include <linux/dcache.h>
  11#include <linux/init.h>
  12#include <linux/quotaops.h>
  13#include <linux/slab.h>
  14#include <linux/writeback.h>
  15#include <linux/module.h>
  16#include <linux/backing-dev.h>
  17#include <linux/wait.h>
  18#include <linux/hash.h>
  19#include <linux/security.h>
  20
  21/*
  22 * This is needed for the following functions:
  23 *  - inode_has_buffers
  24 *  - invalidate_inode_buffers
  25 *  - fsync_bdev
  26 *  - invalidate_bdev
  27 *
  28 * FIXME: remove all knowledge of the buffer layer from this file
  29 */
  30#include <linux/buffer_head.h>
  31
  32/*
  33 * New inode.c implementation.
  34 *
  35 * This implementation has the basic premise of trying
  36 * to be extremely low-overhead and SMP-safe, yet be
  37 * simple enough to be "obviously correct".
  38 *
  39 * Famous last words.
  40 */
  41
  42/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
  43
  44/* #define INODE_PARANOIA 1 */
  45/* #define INODE_DEBUG 1 */
  46
  47/*
  48 * Inode lookup is no longer as critical as it used to be:
  49 * most of the lookups are going to be through the dcache.
  50 */
  51#define I_HASHBITS      i_hash_shift
  52#define I_HASHMASK      i_hash_mask
  53
  54static unsigned int i_hash_mask;
  55static unsigned int i_hash_shift;
  56
  57/*
  58 * Each inode can be on two separate lists. One is
  59 * the hash list of the inode, used for lookups. The
  60 * other linked list is the "type" list:
  61 *  "in_use" - valid inode, i_count > 0, i_nlink > 0
  62 *  "dirty"  - as "in_use" but also dirty
  63 *  "unused" - valid inode, i_count = 0
  64 *
  65 * A "dirty" list is maintained for each super block,
  66 * allowing for low-overhead inode sync() operations.
  67 */
  68
  69LIST_HEAD(inode_in_use);
  70LIST_HEAD(inode_unused);
  71static struct list_head *inode_hashtable;
  72static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
  73
  74/*
  75 * A simple spinlock to protect the list manipulations.
  76 *
  77 * NOTE! You also have to own the lock if you change
  78 * the i_state of an inode while it is in use..
  79 */
  80spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
  81
  82/*
  83 * Statistics gathering..
  84 */
  85struct inodes_stat_t inodes_stat;
  86
  87static kmem_cache_t * inode_cachep;
  88
  89static struct inode *alloc_inode(struct super_block *sb)
  90{
  91        static struct address_space_operations empty_aops;
  92        static struct inode_operations empty_iops;
  93        static struct file_operations empty_fops;
  94        struct inode *inode;
  95
  96        if (sb->s_op->alloc_inode)
  97                inode = sb->s_op->alloc_inode(sb);
  98        else
  99                inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
 100
 101        if (inode) {
 102                struct address_space * const mapping = &inode->i_data;
 103
 104                inode->i_security = NULL;
 105                if (security_ops->inode_alloc_security(inode)) {
 106                        if (inode->i_sb->s_op->destroy_inode)
 107                                inode->i_sb->s_op->destroy_inode(inode);
 108                        else
 109                                kmem_cache_free(inode_cachep, (inode));
 110                        return NULL;
 111                }
 112                inode->i_sb = sb;
 113                inode->i_dev = sb->s_dev;
 114                inode->i_blkbits = sb->s_blocksize_bits;
 115                inode->i_flags = 0;
 116                atomic_set(&inode->i_count, 1);
 117                inode->i_sock = 0;
 118                inode->i_op = &empty_iops;
 119                inode->i_fop = &empty_fops;
 120                inode->i_nlink = 1;
 121                atomic_set(&inode->i_writecount, 0);
 122                inode->i_size = 0;
 123                inode->i_blocks = 0;
 124                inode->i_bytes = 0;
 125                inode->i_generation = 0;
 126                memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 127                inode->i_pipe = NULL;
 128                inode->i_bdev = NULL;
 129                inode->i_cdev = NULL;
 130
 131                mapping->a_ops = &empty_aops;
 132                mapping->host = inode;
 133                mapping->gfp_mask = GFP_HIGHUSER;
 134                mapping->dirtied_when = 0;
 135                mapping->assoc_mapping = NULL;
 136                mapping->backing_dev_info = &default_backing_dev_info;
 137                if (sb->s_bdev)
 138                        inode->i_data.backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 139                memset(&inode->u, 0, sizeof(inode->u));
 140                inode->i_mapping = mapping;
 141        }
 142        return inode;
 143}
 144
 145static void destroy_inode(struct inode *inode) 
 146{
 147        if (inode_has_buffers(inode))
 148                BUG();
 149        security_ops->inode_free_security(inode);
 150        if (inode->i_sb->s_op->destroy_inode)
 151                inode->i_sb->s_op->destroy_inode(inode);
 152        else
 153                kmem_cache_free(inode_cachep, (inode));
 154}
 155
 156
 157/*
 158 * These are initializations that only need to be done
 159 * once, because the fields are idempotent across use
 160 * of the inode, so let the slab aware of that.
 161 */
 162void inode_init_once(struct inode *inode)
 163{
 164        memset(inode, 0, sizeof(*inode));
 165        INIT_LIST_HEAD(&inode->i_hash);
 166        INIT_LIST_HEAD(&inode->i_data.clean_pages);
 167        INIT_LIST_HEAD(&inode->i_data.dirty_pages);
 168        INIT_LIST_HEAD(&inode->i_data.locked_pages);
 169        INIT_LIST_HEAD(&inode->i_data.io_pages);
 170        INIT_LIST_HEAD(&inode->i_dentry);
 171        INIT_LIST_HEAD(&inode->i_devices);
 172        sema_init(&inode->i_sem, 1);
 173        INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 174        rwlock_init(&inode->i_data.page_lock);
 175        spin_lock_init(&inode->i_data.i_shared_lock);
 176        INIT_LIST_HEAD(&inode->i_data.private_list);
 177        spin_lock_init(&inode->i_data.private_lock);
 178        INIT_LIST_HEAD(&inode->i_data.i_mmap);
 179        INIT_LIST_HEAD(&inode->i_data.i_mmap_shared);
 180}
 181
 182static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 183{
 184        struct inode * inode = (struct inode *) foo;
 185
 186        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 187            SLAB_CTOR_CONSTRUCTOR)
 188                inode_init_once(inode);
 189}
 190
 191/*
 192 * inode_lock must be held
 193 */
 194void __iget(struct inode * inode)
 195{
 196        if (atomic_read(&inode->i_count)) {
 197                atomic_inc(&inode->i_count);
 198                return;
 199        }
 200        atomic_inc(&inode->i_count);
 201        if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
 202                list_del(&inode->i_list);
 203                list_add(&inode->i_list, &inode_in_use);
 204        }
 205        inodes_stat.nr_unused--;
 206}
 207
 208/**
 209 * clear_inode - clear an inode
 210 * @inode: inode to clear
 211 *
 212 * This is called by the filesystem to tell us
 213 * that the inode is no longer useful. We just
 214 * terminate it with extreme prejudice.
 215 */
 216 
 217void clear_inode(struct inode *inode)
 218{
 219        invalidate_inode_buffers(inode);
 220       
 221        if (inode->i_data.nrpages)
 222                BUG();
 223        if (!(inode->i_state & I_FREEING))
 224                BUG();
 225        if (inode->i_state & I_CLEAR)
 226                BUG();
 227        wait_on_inode(inode);
 228        DQUOT_DROP(inode);
 229        if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
 230                inode->i_sb->s_op->clear_inode(inode);
 231        if (inode->i_bdev)
 232                bd_forget(inode);
 233        else if (inode->i_cdev) {
 234                cdput(inode->i_cdev);
 235                inode->i_cdev = NULL;
 236        }
 237        inode->i_state = I_CLEAR;
 238}
 239
 240/*
 241 * Dispose-list gets a local list with local inodes in it, so it doesn't
 242 * need to worry about list corruption and SMP locks.
 243 */
 244static void dispose_list(struct list_head * head)
 245{
 246        struct list_head * inode_entry;
 247        struct inode * inode;
 248
 249        while ((inode_entry = head->next) != head)
 250        {
 251                list_del(inode_entry);
 252
 253                inode = list_entry(inode_entry, struct inode, i_list);
 254                if (inode->i_data.nrpages)
 255                        truncate_inode_pages(&inode->i_data, 0);
 256                clear_inode(inode);
 257                destroy_inode(inode);
 258                inodes_stat.nr_inodes--;
 259        }
 260}
 261
 262/*
 263 * Invalidate all inodes for a device.
 264 */
 265static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
 266{
 267        struct list_head *next;
 268        int busy = 0, count = 0;
 269
 270        next = head->next;
 271        for (;;) {
 272                struct list_head * tmp = next;
 273                struct inode * inode;
 274
 275                next = next->next;
 276                if (tmp == head)
 277                        break;
 278                inode = list_entry(tmp, struct inode, i_list);
 279                if (inode->i_sb != sb)
 280                        continue;
 281                invalidate_inode_buffers(inode);
 282                if (!atomic_read(&inode->i_count)) {
 283                        list_del_init(&inode->i_hash);
 284                        list_del(&inode->i_list);
 285                        list_add(&inode->i_list, dispose);
 286                        inode->i_state |= I_FREEING;
 287                        count++;
 288                        continue;
 289                }
 290                busy = 1;
 291        }
 292        /* only unused inodes may be cached with i_count zero */
 293        inodes_stat.nr_unused -= count;
 294        return busy;
 295}
 296
 297/*
 298 * This is a two-stage process. First we collect all
 299 * offending inodes onto the throw-away list, and in
 300 * the second stage we actually dispose of them. This
 301 * is because we don't want to sleep while messing
 302 * with the global lists..
 303 */
 304 
 305/**
 306 *      invalidate_inodes       - discard the inodes on a device
 307 *      @sb: superblock
 308 *
 309 *      Discard all of the inodes for a given superblock. If the discard
 310 *      fails because there are busy inodes then a non zero value is returned.
 311 *      If the discard is successful all the inodes have been discarded.
 312 */
 313 
 314int invalidate_inodes(struct super_block * sb)
 315{
 316        int busy;
 317        LIST_HEAD(throw_away);
 318
 319        spin_lock(&inode_lock);
 320        busy = invalidate_list(&inode_in_use, sb, &throw_away);
 321        busy |= invalidate_list(&inode_unused, sb, &throw_away);
 322        busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
 323        busy |= invalidate_list(&sb->s_io, sb, &throw_away);
 324        spin_unlock(&inode_lock);
 325
 326        dispose_list(&throw_away);
 327
 328        return busy;
 329}
 330 
 331int invalidate_device(kdev_t dev, int do_sync)
 332{
 333        struct super_block *sb;
 334        struct block_device *bdev = bdget(kdev_t_to_nr(dev));
 335        int res;
 336
 337        if (!bdev)
 338                return 0;
 339
 340        if (do_sync)
 341                fsync_bdev(bdev);
 342
 343        res = 0;
 344        sb = get_super(bdev);
 345        if (sb) {
 346                /*
 347                 * no need to lock the super, get_super holds the
 348                 * read semaphore so the filesystem cannot go away
 349                 * under us (->put_super runs with the write lock
 350                 * hold).
 351                 */
 352                shrink_dcache_sb(sb);
 353                res = invalidate_inodes(sb);
 354                drop_super(sb);
 355        }
 356        invalidate_bdev(bdev, 0);
 357        bdput(bdev);
 358        return res;
 359}
 360
 361
 362/*
 363 * This is called with the inode lock held. It searches
 364 * the in-use for freeable inodes, which are moved to a
 365 * temporary list and then placed on the unused list by
 366 * dispose_list. 
 367 *
 368 * We don't expect to have to call this very often.
 369 *
 370 * N.B. The spinlock is released during the call to
 371 *      dispose_list.
 372 */
 373#define CAN_UNUSE(inode) \
 374        ((((inode)->i_state | (inode)->i_data.nrpages) == 0)  && \
 375         !inode_has_buffers(inode))
 376#define INODE(entry)    (list_entry(entry, struct inode, i_list))
 377
 378void prune_icache(int goal)
 379{
 380        LIST_HEAD(list);
 381        struct list_head *entry, *freeable = &list;
 382        int count;
 383        struct inode * inode;
 384
 385        spin_lock(&inode_lock);
 386
 387        count = 0;
 388        entry = inode_unused.prev;
 389        while (entry != &inode_unused)
 390        {
 391                struct list_head *tmp = entry;
 392
 393                entry = entry->prev;
 394                inode = INODE(tmp);
 395                if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
 396                        continue;
 397                if (!CAN_UNUSE(inode))
 398                        continue;
 399                if (atomic_read(&inode->i_count))
 400                        continue;
 401                list_del(tmp);
 402                list_del_init(&inode->i_hash);
 403                list_add(tmp, freeable);
 404                inode->i_state |= I_FREEING;
 405                count++;
 406                if (!--goal)
 407                        break;
 408        }
 409        inodes_stat.nr_unused -= count;
 410        spin_unlock(&inode_lock);
 411
 412        dispose_list(freeable);
 413}
 414
 415/*
 416 * This is called from kswapd when we think we need some
 417 * more memory, but aren't really sure how much. So we
 418 * carefully try to free a _bit_ of our icache, but not
 419 * too much.
 420 *
 421 * Priority:
 422 *   1 - very urgent: shrink everything
 423 *  ...
 424 *   6 - base-level: try to shrink a bit.
 425 */
 426int shrink_icache_memory(int priority, int gfp_mask)
 427{
 428        int count = 0;
 429
 430        /*
 431         * Nasty deadlock avoidance..
 432         *
 433         * We may hold various FS locks, and we don't
 434         * want to recurse into the FS that called us
 435         * in clear_inode() and friends..
 436         */
 437        if (!(gfp_mask & __GFP_FS))
 438                return 0;
 439
 440        count = inodes_stat.nr_unused / priority;
 441
 442        prune_icache(count);
 443        kmem_cache_shrink(inode_cachep);
 444        return 0;
 445}
 446
 447/*
 448 * Called with the inode lock held.
 449 * NOTE: we are not increasing the inode-refcount, you must call __iget()
 450 * by hand after calling find_inode now! This simplifies iunique and won't
 451 * add any additional branch in the common code.
 452 */
 453static struct inode * find_inode(struct super_block * sb, struct list_head *head, int (*test)(struct inode *, void *), void *data)
 454{
 455        struct list_head *tmp;
 456        struct inode * inode;
 457
 458        tmp = head;
 459        for (;;) {
 460                tmp = tmp->next;
 461                inode = NULL;
 462                if (tmp == head)
 463                        break;
 464                inode = list_entry(tmp, struct inode, i_hash);
 465                if (inode->i_sb != sb)
 466                        continue;
 467                if (!test(inode, data))
 468                        continue;
 469                break;
 470        }
 471        return inode;
 472}
 473
 474/*
 475 * find_inode_fast is the fast path version of find_inode, see the comment at
 476 * iget_locked for details.
 477 */
 478static struct inode * find_inode_fast(struct super_block * sb, struct list_head *head, unsigned long ino)
 479{
 480        struct list_head *tmp;
 481        struct inode * inode;
 482
 483        tmp = head;
 484        for (;;) {
 485                tmp = tmp->next;
 486                inode = NULL;
 487                if (tmp == head)
 488                        break;
 489                inode = list_entry(tmp, struct inode, i_hash);
 490                if (inode->i_ino != ino)
 491                        continue;
 492                if (inode->i_sb != sb)
 493                        continue;
 494                break;
 495        }
 496        return inode;
 497}
 498
 499/**
 500 *      new_inode       - obtain an inode
 501 *      @sb: superblock
 502 *
 503 *      Allocates a new inode for given superblock.
 504 */
 505 
 506struct inode *new_inode(struct super_block *sb)
 507{
 508        static unsigned long last_ino;
 509        struct inode * inode;
 510
 511        spin_lock_prefetch(&inode_lock);
 512        
 513        inode = alloc_inode(sb);
 514        if (inode) {
 515                spin_lock(&inode_lock);
 516                inodes_stat.nr_inodes++;
 517                list_add(&inode->i_list, &inode_in_use);
 518                inode->i_ino = ++last_ino;
 519                inode->i_state = 0;
 520                spin_unlock(&inode_lock);
 521        }
 522        return inode;
 523}
 524
 525void unlock_new_inode(struct inode *inode)
 526{
 527        /*
 528         * This is special!  We do not need the spinlock
 529         * when clearing I_LOCK, because we're guaranteed
 530         * that nobody else tries to do anything about the
 531         * state of the inode when it is locked, as we
 532         * just created it (so there can be no old holders
 533         * that haven't tested I_LOCK).
 534         */
 535        inode->i_state &= ~(I_LOCK|I_NEW);
 536        wake_up_inode(inode);
 537}
 538
 539
 540/*
 541 * This is called without the inode lock held.. Be careful.
 542 *
 543 * We no longer cache the sb_flags in i_flags - see fs.h
 544 *      -- rmk@arm.uk.linux.org
 545 */
 546static struct inode * get_new_inode(struct super_block *sb, struct list_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data)
 547{
 548        struct inode * inode;
 549
 550        inode = alloc_inode(sb);
 551        if (inode) {
 552                struct inode * old;
 553
 554                spin_lock(&inode_lock);
 555                /* We released the lock, so.. */
 556                old = find_inode(sb, head, test, data);
 557                if (!old) {
 558                        if (set(inode, data))
 559                                goto set_failed;
 560
 561                        inodes_stat.nr_inodes++;
 562                        list_add(&inode->i_list, &inode_in_use);
 563                        list_add(&inode->i_hash, head);
 564                        inode->i_state = I_LOCK|I_NEW;
 565                        spin_unlock(&inode_lock);
 566
 567                        /* Return the locked inode with I_NEW set, the
 568                         * caller is responsible for filling in the contents
 569                         */
 570                        return inode;
 571                }
 572
 573                /*
 574                 * Uhhuh, somebody else created the same inode under
 575                 * us. Use the old inode instead of the one we just
 576                 * allocated.
 577                 */
 578                __iget(old);
 579                spin_unlock(&inode_lock);
 580                destroy_inode(inode);
 581                inode = old;
 582                wait_on_inode(inode);
 583        }
 584        return inode;
 585
 586set_failed:
 587        spin_unlock(&inode_lock);
 588        destroy_inode(inode);
 589        return NULL;
 590}
 591
 592/*
 593 * get_new_inode_fast is the fast path version of get_new_inode, see the
 594 * comment at iget_locked for details.
 595 */
 596static struct inode * get_new_inode_fast(struct super_block *sb, struct list_head *head, unsigned long ino)
 597{
 598        struct inode * inode;
 599
 600        inode = alloc_inode(sb);
 601        if (inode) {
 602                struct inode * old;
 603
 604                spin_lock(&inode_lock);
 605                /* We released the lock, so.. */
 606                old = find_inode_fast(sb, head, ino);
 607                if (!old) {
 608                        inode->i_ino = ino;
 609                        inodes_stat.nr_inodes++;
 610                        list_add(&inode->i_list, &inode_in_use);
 611                        list_add(&inode->i_hash, head);
 612                        inode->i_state = I_LOCK|I_NEW;
 613                        spin_unlock(&inode_lock);
 614
 615                        /* Return the locked inode with I_NEW set, the
 616                         * caller is responsible for filling in the contents
 617                         */
 618                        return inode;
 619                }
 620
 621                /*
 622                 * Uhhuh, somebody else created the same inode under
 623                 * us. Use the old inode instead of the one we just
 624                 * allocated.
 625                 */
 626                __iget(old);
 627                spin_unlock(&inode_lock);
 628                destroy_inode(inode);
 629                inode = old;
 630                wait_on_inode(inode);
 631        }
 632        return inode;
 633}
 634
 635static inline unsigned long hash(struct super_block *sb, unsigned long hashval)
 636{
 637        unsigned long tmp = hashval + ((unsigned long) sb / L1_CACHE_BYTES);
 638        tmp = tmp + (tmp >> I_HASHBITS);
 639        return tmp & I_HASHMASK;
 640}
 641
 642/* Yeah, I know about quadratic hash. Maybe, later. */
 643
 644/**
 645 *      iunique - get a unique inode number
 646 *      @sb: superblock
 647 *      @max_reserved: highest reserved inode number
 648 *
 649 *      Obtain an inode number that is unique on the system for a given
 650 *      superblock. This is used by file systems that have no natural
 651 *      permanent inode numbering system. An inode number is returned that
 652 *      is higher than the reserved limit but unique.
 653 *
 654 *      BUGS:
 655 *      With a large number of inodes live on the file system this function
 656 *      currently becomes quite slow.
 657 */
 658 
 659ino_t iunique(struct super_block *sb, ino_t max_reserved)
 660{
 661        static ino_t counter = 0;
 662        struct inode *inode;
 663        struct list_head * head;
 664        ino_t res;
 665        spin_lock(&inode_lock);
 666retry:
 667        if (counter > max_reserved) {
 668                head = inode_hashtable + hash(sb,counter);
 669                res = counter++;
 670                inode = find_inode_fast(sb, head, res);
 671                if (!inode) {
 672                        spin_unlock(&inode_lock);
 673                        return res;
 674                }
 675        } else {
 676                counter = max_reserved + 1;
 677        }
 678        goto retry;
 679        
 680}
 681
 682struct inode *igrab(struct inode *inode)
 683{
 684        spin_lock(&inode_lock);
 685        if (!(inode->i_state & I_FREEING))
 686                __iget(inode);
 687        else
 688                /*
 689                 * Handle the case where s_op->clear_inode is not been
 690                 * called yet, and somebody is calling igrab
 691                 * while the inode is getting freed.
 692                 */
 693                inode = NULL;
 694        spin_unlock(&inode_lock);
 695        return inode;
 696}
 697
 698/*
 699 * This is iget without the read_inode portion of get_new_inode
 700 * the filesystem gets back a new locked and hashed inode and gets
 701 * to fill it in before unlocking it via unlock_new_inode().
 702 */
 703struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data)
 704{
 705        struct list_head * head = inode_hashtable + hash(sb, hashval);
 706        struct inode * inode;
 707
 708        spin_lock(&inode_lock);
 709        inode = find_inode(sb, head, test, data);
 710        if (inode) {
 711                __iget(inode);
 712                spin_unlock(&inode_lock);
 713                wait_on_inode(inode);
 714                return inode;
 715        }
 716        spin_unlock(&inode_lock);
 717
 718        /*
 719         * get_new_inode() will do the right thing, re-trying the search
 720         * in case it had to block at any point.
 721         */
 722        return get_new_inode(sb, head, test, set, data);
 723}
 724
 725/*
 726 * Because most filesystems are based on 32-bit unique inode numbers some
 727 * functions are duplicated to keep iget_locked as a fast path. We can avoid
 728 * unnecessary pointer dereferences and function calls for this specific
 729 * case. The duplicated functions (find_inode_fast and get_new_inode_fast)
 730 * have the same pre- and post-conditions as their original counterparts.
 731 */
 732struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 733{
 734        struct list_head * head = inode_hashtable + hash(sb, ino);
 735        struct inode * inode;
 736
 737        spin_lock(&inode_lock);
 738        inode = find_inode_fast(sb, head, ino);
 739        if (inode) {
 740                __iget(inode);
 741                spin_unlock(&inode_lock);
 742                wait_on_inode(inode);
 743                return inode;
 744        }
 745        spin_unlock(&inode_lock);
 746
 747        /*
 748         * get_new_inode_fast() will do the right thing, re-trying the search
 749         * in case it had to block at any point.
 750         */
 751        return get_new_inode_fast(sb, head, ino);
 752}
 753
 754EXPORT_SYMBOL(iget5_locked);
 755EXPORT_SYMBOL(iget_locked);
 756EXPORT_SYMBOL(unlock_new_inode);
 757
 758/**
 759 *      __insert_inode_hash - hash an inode
 760 *      @inode: unhashed inode
 761 *      @hashval: unsigned long value used to locate this object in the
 762 *              inode_hashtable.
 763 *
 764 *      Add an inode to the inode hash for this superblock. If the inode
 765 *      has no superblock it is added to a separate anonymous chain.
 766 */
 767 
 768void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 769{
 770        struct list_head *head = &anon_hash_chain;
 771        if (inode->i_sb)
 772                head = inode_hashtable + hash(inode->i_sb, hashval);
 773        spin_lock(&inode_lock);
 774        list_add(&inode->i_hash, head);
 775        spin_unlock(&inode_lock);
 776}
 777
 778/**
 779 *      remove_inode_hash - remove an inode from the hash
 780 *      @inode: inode to unhash
 781 *
 782 *      Remove an inode from the superblock or anonymous hash.
 783 */
 784 
 785void remove_inode_hash(struct inode *inode)
 786{
 787        spin_lock(&inode_lock);
 788        list_del_init(&inode->i_hash);
 789        spin_unlock(&inode_lock);
 790}
 791
 792void generic_delete_inode(struct inode *inode)
 793{
 794        struct super_operations *op = inode->i_sb->s_op;
 795
 796        list_del_init(&inode->i_hash);
 797        list_del_init(&inode->i_list);
 798        inode->i_state|=I_FREEING;
 799        inodes_stat.nr_inodes--;
 800        spin_unlock(&inode_lock);
 801
 802        if (inode->i_data.nrpages)
 803                truncate_inode_pages(&inode->i_data, 0);
 804
 805        security_ops->inode_delete(inode);
 806
 807        if (op && op->delete_inode) {
 808                void (*delete)(struct inode *) = op->delete_inode;
 809                if (!is_bad_inode(inode))
 810                        DQUOT_INIT(inode);
 811                /* s_op->delete_inode internally recalls clear_inode() */
 812                delete(inode);
 813        } else
 814                clear_inode(inode);
 815        if (inode->i_state != I_CLEAR)
 816                BUG();
 817        destroy_inode(inode);
 818}
 819EXPORT_SYMBOL(generic_delete_inode);
 820
 821static void generic_forget_inode(struct inode *inode)
 822{
 823        struct super_block *sb = inode->i_sb;
 824
 825        if (!list_empty(&inode->i_hash)) {
 826                if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
 827                        list_del(&inode->i_list);
 828                        list_add(&inode->i_list, &inode_unused);
 829                }
 830                inodes_stat.nr_unused++;
 831                spin_unlock(&inode_lock);
 832                if (!sb || (sb->s_flags & MS_ACTIVE))
 833                        return;
 834                write_inode_now(inode, 1);
 835                spin_lock(&inode_lock);
 836                inodes_stat.nr_unused--;
 837                list_del_init(&inode->i_hash);
 838        }
 839        list_del_init(&inode->i_list);
 840        inode->i_state|=I_FREEING;
 841        inodes_stat.nr_inodes--;
 842        spin_unlock(&inode_lock);
 843        if (inode->i_data.nrpages)
 844                truncate_inode_pages(&inode->i_data, 0);
 845        clear_inode(inode);
 846        destroy_inode(inode);
 847}
 848
 849/*
 850 * Normal UNIX filesystem behaviour: delete the
 851 * inode when the usage count drops to zero, and
 852 * i_nlink is zero.
 853 */
 854static void generic_drop_inode(struct inode *inode)
 855{
 856        if (!inode->i_nlink)
 857                generic_delete_inode(inode);
 858        else
 859                generic_forget_inode(inode);
 860}
 861
 862/*
 863 * Called when we're dropping the last reference
 864 * to an inode. 
 865 *
 866 * Call the FS "drop()" function, defaulting to
 867 * the legacy UNIX filesystem behaviour..
 868 *
 869 * NOTE! NOTE! NOTE! We're called with the inode lock
 870 * held, and the drop function is supposed to release
 871 * the lock!
 872 */
 873static inline void iput_final(struct inode *inode)
 874{
 875        struct super_operations *op = inode->i_sb->s_op;
 876        void (*drop)(struct inode *) = generic_drop_inode;
 877
 878        if (op && op->drop_inode)
 879                drop = op->drop_inode;
 880        drop(inode);
 881}
 882
 883/**
 884 *      iput    - put an inode 
 885 *      @inode: inode to put
 886 *
 887 *      Puts an inode, dropping its usage count. If the inode use count hits
 888 *      zero the inode is also then freed and may be destroyed.
 889 */
 890 
 891void iput(struct inode *inode)
 892{
 893        if (inode) {
 894                struct super_operations *op = inode->i_sb->s_op;
 895
 896                if (inode->i_state == I_CLEAR)
 897                        BUG();
 898
 899                if (op && op->put_inode)
 900                        op->put_inode(inode);
 901
 902                if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
 903                        iput_final(inode);
 904        }
 905}
 906
 907/**
 908 *      bmap    - find a block number in a file
 909 *      @inode: inode of file
 910 *      @block: block to find
 911 *
 912 *      Returns the block number on the device holding the inode that
 913 *      is the disk block number for the block of the file requested.
 914 *      That is, asked for block 4 of inode 1 the function will return the
 915 *      disk block relative to the disk start that holds that block of the 
 916 *      file.
 917 */
 918 
 919int bmap(struct inode * inode, int block)
 920{
 921        int res = 0;
 922        if (inode->i_mapping->a_ops->bmap)
 923                res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
 924        return res;
 925}
 926
 927/**
 928 *      update_atime    -       update the access time
 929 *      @inode: inode accessed
 930 *
 931 *      Update the accessed time on an inode and mark it for writeback.
 932 *      This function automatically handles read only file systems and media,
 933 *      as well as the "noatime" flag and inode specific "noatime" markers.
 934 */
 935 
 936void update_atime(struct inode *inode)
 937{
 938        if (inode->i_atime == CURRENT_TIME)
 939                return;
 940        if (IS_NOATIME(inode))
 941                return;
 942        if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode))
 943                return;
 944        if (IS_RDONLY(inode))
 945                return;
 946        inode->i_atime = CURRENT_TIME;
 947        mark_inode_dirty_sync(inode);
 948}
 949
 950int inode_needs_sync(struct inode *inode)
 951{
 952        if (IS_SYNC(inode))
 953                return 1;
 954        if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
 955                return 1;
 956        return 0;
 957}
 958EXPORT_SYMBOL(inode_needs_sync);
 959
 960/*
 961 *      Quota functions that want to walk the inode lists..
 962 */
 963#ifdef CONFIG_QUOTA
 964
 965/* Functions back in dquot.c */
 966void put_dquot_list(struct list_head *);
 967int remove_inode_dquot_ref(struct inode *, int, struct list_head *);
 968
 969void remove_dquot_ref(struct super_block *sb, int type)
 970{
 971        struct inode *inode;
 972        struct list_head *act_head;
 973        LIST_HEAD(tofree_head);
 974
 975        if (!sb->dq_op)
 976                return; /* nothing to do */
 977        /* We have to be protected against other CPUs */
 978        lock_kernel();          /* This lock is for quota code */
 979        spin_lock(&inode_lock); /* This lock is for inodes code */
 980 
 981        list_for_each(act_head, &inode_in_use) {
 982                inode = list_entry(act_head, struct inode, i_list);
 983                if (inode->i_sb == sb && IS_QUOTAINIT(inode))
 984                        remove_inode_dquot_ref(inode, type, &tofree_head);
 985        }
 986        list_for_each(act_head, &inode_unused) {
 987                inode = list_entry(act_head, struct inode, i_list);
 988                if (inode->i_sb == sb && IS_QUOTAINIT(inode))
 989                        remove_inode_dquot_ref(inode, type, &tofree_head);
 990        }
 991        list_for_each(act_head, &sb->s_dirty) {
 992                inode = list_entry(act_head, struct inode, i_list);
 993                if (IS_QUOTAINIT(inode))
 994                        remove_inode_dquot_ref(inode, type, &tofree_head);
 995        }
 996        list_for_each(act_head, &sb->s_io) {
 997                inode = list_entry(act_head, struct inode, i_list);
 998                if (IS_QUOTAINIT(inode))
 999                        remove_inode_dquot_ref(inode, type, &tofree_head);
1000        }
1001        spin_unlock(&inode_lock);
1002        unlock_kernel();
1003
1004        put_dquot_list(&tofree_head);
1005}
1006
1007#endif
1008
1009/*
1010 * Hashed waitqueues for wait_on_inode().  The table is pretty small - the
1011 * kernel doesn't lock many inodes at the same time.
1012 */
1013#define I_WAIT_TABLE_ORDER      3
1014static struct i_wait_queue_head {
1015        wait_queue_head_t wqh;
1016} ____cacheline_aligned_in_smp i_wait_queue_heads[1<<I_WAIT_TABLE_ORDER];
1017
1018/*
1019 * Return the address of the waitqueue_head to be used for this inode
1020 */
1021static wait_queue_head_t *i_waitq_head(struct inode *inode)
1022{
1023        return &i_wait_queue_heads[hash_ptr(inode, I_WAIT_TABLE_ORDER)].wqh;
1024}
1025
1026void __wait_on_inode(struct inode *inode)
1027{
1028        DECLARE_WAITQUEUE(wait, current);
1029        wait_queue_head_t *wq = i_waitq_head(inode);
1030
1031        add_wait_queue(wq, &wait);
1032repeat:
1033        set_current_state(TASK_UNINTERRUPTIBLE);
1034        if (inode->i_state & I_LOCK) {
1035                schedule();
1036                goto repeat;
1037        }
1038        remove_wait_queue(wq, &wait);
1039        current->state = TASK_RUNNING;
1040}
1041
1042void wake_up_inode(struct inode *inode)
1043{
1044        wait_queue_head_t *wq = i_waitq_head(inode);
1045
1046        /*
1047         * Prevent speculative execution through spin_unlock(&inode_lock);
1048         */
1049        smp_mb();
1050        if (waitqueue_active(wq))
1051                wake_up_all(wq);
1052}
1053
1054/*
1055 * Initialize the waitqueues and inode hash table.
1056 */
1057void __init inode_init(unsigned long mempages)
1058{
1059        struct list_head *head;
1060        unsigned long order;
1061        unsigned int nr_hash;
1062        int i;
1063
1064        for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
1065                init_waitqueue_head(&i_wait_queue_heads[i].wqh);
1066
1067        mempages >>= (14 - PAGE_SHIFT);
1068        mempages *= sizeof(struct list_head);
1069        for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
1070                ;
1071
1072        do {
1073                unsigned long tmp;
1074
1075                nr_hash = (1UL << order) * PAGE_SIZE /
1076                        sizeof(struct list_head);
1077                i_hash_mask = (nr_hash - 1);
1078
1079                tmp = nr_hash;
1080                i_hash_shift = 0;
1081                while ((tmp >>= 1UL) != 0UL)
1082                        i_hash_shift++;
1083
1084                inode_hashtable = (struct list_head *)
1085                        __get_free_pages(GFP_ATOMIC, order);
1086        } while (inode_hashtable == NULL && --order >= 0);
1087
1088        printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n",
1089                        nr_hash, order, (PAGE_SIZE << order));
1090
1091        if (!inode_hashtable)
1092                panic("Failed to allocate inode hash table\n");
1093
1094        head = inode_hashtable;
1095        i = nr_hash;
1096        do {
1097                INIT_LIST_HEAD(head);
1098                head++;
1099                i--;
1100        } while (i);
1101
1102        /* inode slab cache */
1103        inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
1104                                         0, SLAB_HWCACHE_ALIGN, init_once,
1105                                         NULL);
1106        if (!inode_cachep)
1107                panic("cannot create inode slab cache");
1108}
1109
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.