linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/export.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include <linux/prefetch.h>
  39#include <linux/ratelimit.h>
  40#include "internal.h"
  41#include "mount.h"
  42
  43/*
  44 * Usage:
  45 * dcache->d_inode->i_lock protects:
  46 *   - i_dentry, d_alias, d_inode of aliases
  47 * dcache_hash_bucket lock protects:
  48 *   - the dcache hash table
  49 * s_anon bl list spinlock protects:
  50 *   - the s_anon list (see __d_drop)
  51 * dcache_lru_lock protects:
  52 *   - the dcache lru lists and counters
  53 * d_lock protects:
  54 *   - d_flags
  55 *   - d_name
  56 *   - d_lru
  57 *   - d_count
  58 *   - d_unhashed()
  59 *   - d_parent and d_subdirs
  60 *   - childrens' d_child and d_parent
  61 *   - d_alias, d_inode
  62 *
  63 * Ordering:
  64 * dentry->d_inode->i_lock
  65 *   dentry->d_lock
  66 *     dcache_lru_lock
  67 *     dcache_hash_bucket lock
  68 *     s_anon lock
  69 *
  70 * If there is an ancestor relationship:
  71 * dentry->d_parent->...->d_parent->d_lock
  72 *   ...
  73 *     dentry->d_parent->d_lock
  74 *       dentry->d_lock
  75 *
  76 * If no ancestor relationship:
  77 * if (dentry1 < dentry2)
  78 *   dentry1->d_lock
  79 *     dentry2->d_lock
  80 */
  81int sysctl_vfs_cache_pressure __read_mostly = 100;
  82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  83
  84static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  86
  87EXPORT_SYMBOL(rename_lock);
  88
  89static struct kmem_cache *dentry_cache __read_mostly;
  90
  91/*
  92 * This is the single most critical data structure when it comes
  93 * to the dcache: the hashtable for lookups. Somebody should try
  94 * to make this good - I've just made it work.
  95 *
  96 * This hash-function tries to avoid losing too many bits of hash
  97 * information, yet avoid using a prime hash-size or similar.
  98 */
  99#define D_HASHBITS     d_hash_shift
 100#define D_HASHMASK     d_hash_mask
 101
 102static unsigned int d_hash_mask __read_mostly;
 103static unsigned int d_hash_shift __read_mostly;
 104
 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
 106
 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 108                                        unsigned int hash)
 109{
 110        hash += (unsigned long) parent / L1_CACHE_BYTES;
 111        hash = hash + (hash >> D_HASHBITS);
 112        return dentry_hashtable + (hash & D_HASHMASK);
 113}
 114
 115/* Statistics gathering. */
 116struct dentry_stat_t dentry_stat = {
 117        .age_limit = 45,
 118};
 119
 120static DEFINE_PER_CPU(unsigned int, nr_dentry);
 121
 122#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 123static int get_nr_dentry(void)
 124{
 125        int i;
 126        int sum = 0;
 127        for_each_possible_cpu(i)
 128                sum += per_cpu(nr_dentry, i);
 129        return sum < 0 ? 0 : sum;
 130}
 131
 132int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 133                   size_t *lenp, loff_t *ppos)
 134{
 135        dentry_stat.nr_dentry = get_nr_dentry();
 136        return proc_dointvec(table, write, buffer, lenp, ppos);
 137}
 138#endif
 139
 140/*
 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 142 * The strings are both count bytes long, and count is non-zero.
 143 */
 144#ifdef CONFIG_DCACHE_WORD_ACCESS
 145
 146#include <asm/word-at-a-time.h>
 147/*
 148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 149 * aligned allocation for this particular component. We don't
 150 * strictly need the load_unaligned_zeropad() safety, but it
 151 * doesn't hurt either.
 152 *
 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 154 * need the careful unaligned handling.
 155 */
 156static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 157{
 158        unsigned long a,b,mask;
 159
 160        for (;;) {
 161                a = *(unsigned long *)cs;
 162                b = load_unaligned_zeropad(ct);
 163                if (tcount < sizeof(unsigned long))
 164                        break;
 165                if (unlikely(a != b))
 166                        return 1;
 167                cs += sizeof(unsigned long);
 168                ct += sizeof(unsigned long);
 169                tcount -= sizeof(unsigned long);
 170                if (!tcount)
 171                        return 0;
 172        }
 173        mask = ~(~0ul << tcount*8);
 174        return unlikely(!!((a ^ b) & mask));
 175}
 176
 177#else
 178
 179static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 180{
 181        do {
 182                if (*cs != *ct)
 183                        return 1;
 184                cs++;
 185                ct++;
 186                tcount--;
 187        } while (tcount);
 188        return 0;
 189}
 190
 191#endif
 192
 193static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 194{
 195        const unsigned char *cs;
 196        /*
 197         * Be careful about RCU walk racing with rename:
 198         * use ACCESS_ONCE to fetch the name pointer.
 199         *
 200         * NOTE! Even if a rename will mean that the length
 201         * was not loaded atomically, we don't care. The
 202         * RCU walk will check the sequence count eventually,
 203         * and catch it. And we won't overrun the buffer,
 204         * because we're reading the name pointer atomically,
 205         * and a dentry name is guaranteed to be properly
 206         * terminated with a NUL byte.
 207         *
 208         * End result: even if 'len' is wrong, we'll exit
 209         * early because the data cannot match (there can
 210         * be no NUL in the ct/tcount data)
 211         */
 212        cs = ACCESS_ONCE(dentry->d_name.name);
 213        smp_read_barrier_depends();
 214        return dentry_string_cmp(cs, ct, tcount);
 215}
 216
 217static void __d_free(struct rcu_head *head)
 218{
 219        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 220
 221        WARN_ON(!hlist_unhashed(&dentry->d_alias));
 222        if (dname_external(dentry))
 223                kfree(dentry->d_name.name);
 224        kmem_cache_free(dentry_cache, dentry); 
 225}
 226
 227/*
 228 * no locks, please.
 229 */
 230static void d_free(struct dentry *dentry)
 231{
 232        BUG_ON(dentry->d_count);
 233        this_cpu_dec(nr_dentry);
 234        if (dentry->d_op && dentry->d_op->d_release)
 235                dentry->d_op->d_release(dentry);
 236
 237        /* if dentry was never visible to RCU, immediate free is OK */
 238        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 239                __d_free(&dentry->d_u.d_rcu);
 240        else
 241                call_rcu(&dentry->d_u.d_rcu, __d_free);
 242}
 243
 244/**
 245 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 246 * @dentry: the target dentry
 247 * After this call, in-progress rcu-walk path lookup will fail. This
 248 * should be called after unhashing, and after changing d_inode (if
 249 * the dentry has not already been unhashed).
 250 */
 251static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 252{
 253        assert_spin_locked(&dentry->d_lock);
 254        /* Go through a barrier */
 255        write_seqcount_barrier(&dentry->d_seq);
 256}
 257
 258/*
 259 * Release the dentry's inode, using the filesystem
 260 * d_iput() operation if defined. Dentry has no refcount
 261 * and is unhashed.
 262 */
 263static void dentry_iput(struct dentry * dentry)
 264        __releases(dentry->d_lock)
 265        __releases(dentry->d_inode->i_lock)
 266{
 267        struct inode *inode = dentry->d_inode;
 268        if (inode) {
 269                dentry->d_inode = NULL;
 270                hlist_del_init(&dentry->d_alias);
 271                spin_unlock(&dentry->d_lock);
 272                spin_unlock(&inode->i_lock);
 273                if (!inode->i_nlink)
 274                        fsnotify_inoderemove(inode);
 275                if (dentry->d_op && dentry->d_op->d_iput)
 276                        dentry->d_op->d_iput(dentry, inode);
 277                else
 278                        iput(inode);
 279        } else {
 280                spin_unlock(&dentry->d_lock);
 281        }
 282}
 283
 284/*
 285 * Release the dentry's inode, using the filesystem
 286 * d_iput() operation if defined. dentry remains in-use.
 287 */
 288static void dentry_unlink_inode(struct dentry * dentry)
 289        __releases(dentry->d_lock)
 290        __releases(dentry->d_inode->i_lock)
 291{
 292        struct inode *inode = dentry->d_inode;
 293        dentry->d_inode = NULL;
 294        hlist_del_init(&dentry->d_alias);
 295        dentry_rcuwalk_barrier(dentry);
 296        spin_unlock(&dentry->d_lock);
 297        spin_unlock(&inode->i_lock);
 298        if (!inode->i_nlink)
 299                fsnotify_inoderemove(inode);
 300        if (dentry->d_op && dentry->d_op->d_iput)
 301                dentry->d_op->d_iput(dentry, inode);
 302        else
 303                iput(inode);
 304}
 305
 306/*
 307 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 308 */
 309static void dentry_lru_add(struct dentry *dentry)
 310{
 311        if (list_empty(&dentry->d_lru)) {
 312                spin_lock(&dcache_lru_lock);
 313                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 314                dentry->d_sb->s_nr_dentry_unused++;
 315                dentry_stat.nr_unused++;
 316                spin_unlock(&dcache_lru_lock);
 317        }
 318}
 319
 320static void __dentry_lru_del(struct dentry *dentry)
 321{
 322        list_del_init(&dentry->d_lru);
 323        dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 324        dentry->d_sb->s_nr_dentry_unused--;
 325        dentry_stat.nr_unused--;
 326}
 327
 328/*
 329 * Remove a dentry with references from the LRU.
 330 */
 331static void dentry_lru_del(struct dentry *dentry)
 332{
 333        if (!list_empty(&dentry->d_lru)) {
 334                spin_lock(&dcache_lru_lock);
 335                __dentry_lru_del(dentry);
 336                spin_unlock(&dcache_lru_lock);
 337        }
 338}
 339
 340/*
 341 * Remove a dentry that is unreferenced and about to be pruned
 342 * (unhashed and destroyed) from the LRU, and inform the file system.
 343 * This wrapper should be called _prior_ to unhashing a victim dentry.
 344 */
 345static void dentry_lru_prune(struct dentry *dentry)
 346{
 347        if (!list_empty(&dentry->d_lru)) {
 348                if (dentry->d_flags & DCACHE_OP_PRUNE)
 349                        dentry->d_op->d_prune(dentry);
 350
 351                spin_lock(&dcache_lru_lock);
 352                __dentry_lru_del(dentry);
 353                spin_unlock(&dcache_lru_lock);
 354        }
 355}
 356
 357static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 358{
 359        spin_lock(&dcache_lru_lock);
 360        if (list_empty(&dentry->d_lru)) {
 361                list_add_tail(&dentry->d_lru, list);
 362                dentry->d_sb->s_nr_dentry_unused++;
 363                dentry_stat.nr_unused++;
 364        } else {
 365                list_move_tail(&dentry->d_lru, list);
 366        }
 367        spin_unlock(&dcache_lru_lock);
 368}
 369
 370/**
 371 * d_kill - kill dentry and return parent
 372 * @dentry: dentry to kill
 373 * @parent: parent dentry
 374 *
 375 * The dentry must already be unhashed and removed from the LRU.
 376 *
 377 * If this is the root of the dentry tree, return NULL.
 378 *
 379 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 380 * d_kill.
 381 */
 382static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 383        __releases(dentry->d_lock)
 384        __releases(parent->d_lock)
 385        __releases(dentry->d_inode->i_lock)
 386{
 387        list_del(&dentry->d_u.d_child);
 388        /*
 389         * Inform try_to_ascend() that we are no longer attached to the
 390         * dentry tree
 391         */
 392        dentry->d_flags |= DCACHE_DENTRY_KILLED;
 393        if (parent)
 394                spin_unlock(&parent->d_lock);
 395        dentry_iput(dentry);
 396        /*
 397         * dentry_iput drops the locks, at which point nobody (except
 398         * transient RCU lookups) can reach this dentry.
 399         */
 400        d_free(dentry);
 401        return parent;
 402}
 403
 404/*
 405 * Unhash a dentry without inserting an RCU walk barrier or checking that
 406 * dentry->d_lock is locked.  The caller must take care of that, if
 407 * appropriate.
 408 */
 409static void __d_shrink(struct dentry *dentry)
 410{
 411        if (!d_unhashed(dentry)) {
 412                struct hlist_bl_head *b;
 413                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 414                        b = &dentry->d_sb->s_anon;
 415                else
 416                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 417
 418                hlist_bl_lock(b);
 419                __hlist_bl_del(&dentry->d_hash);
 420                dentry->d_hash.pprev = NULL;
 421                hlist_bl_unlock(b);
 422        }
 423}
 424
 425/**
 426 * d_drop - drop a dentry
 427 * @dentry: dentry to drop
 428 *
 429 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 430 * be found through a VFS lookup any more. Note that this is different from
 431 * deleting the dentry - d_delete will try to mark the dentry negative if
 432 * possible, giving a successful _negative_ lookup, while d_drop will
 433 * just make the cache lookup fail.
 434 *
 435 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 436 * reason (NFS timeouts or autofs deletes).
 437 *
 438 * __d_drop requires dentry->d_lock.
 439 */
 440void __d_drop(struct dentry *dentry)
 441{
 442        if (!d_unhashed(dentry)) {
 443                __d_shrink(dentry);
 444                dentry_rcuwalk_barrier(dentry);
 445        }
 446}
 447EXPORT_SYMBOL(__d_drop);
 448
 449void d_drop(struct dentry *dentry)
 450{
 451        spin_lock(&dentry->d_lock);
 452        __d_drop(dentry);
 453        spin_unlock(&dentry->d_lock);
 454}
 455EXPORT_SYMBOL(d_drop);
 456
 457/*
 458 * Finish off a dentry we've decided to kill.
 459 * dentry->d_lock must be held, returns with it unlocked.
 460 * If ref is non-zero, then decrement the refcount too.
 461 * Returns dentry requiring refcount drop, or NULL if we're done.
 462 */
 463static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 464        __releases(dentry->d_lock)
 465{
 466        struct inode *inode;
 467        struct dentry *parent;
 468
 469        inode = dentry->d_inode;
 470        if (inode && !spin_trylock(&inode->i_lock)) {
 471relock:
 472                spin_unlock(&dentry->d_lock);
 473                cpu_relax();
 474                return dentry; /* try again with same dentry */
 475        }
 476        if (IS_ROOT(dentry))
 477                parent = NULL;
 478        else
 479                parent = dentry->d_parent;
 480        if (parent && !spin_trylock(&parent->d_lock)) {
 481                if (inode)
 482                        spin_unlock(&inode->i_lock);
 483                goto relock;
 484        }
 485
 486        if (ref)
 487                dentry->d_count--;
 488        /*
 489         * if dentry was on the d_lru list delete it from there.
 490         * inform the fs via d_prune that this dentry is about to be
 491         * unhashed and destroyed.
 492         */
 493        dentry_lru_prune(dentry);
 494        /* if it was on the hash then remove it */
 495        __d_drop(dentry);
 496        return d_kill(dentry, parent);
 497}
 498
 499/* 
 500 * This is dput
 501 *
 502 * This is complicated by the fact that we do not want to put
 503 * dentries that are no longer on any hash chain on the unused
 504 * list: we'd much rather just get rid of them immediately.
 505 *
 506 * However, that implies that we have to traverse the dentry
 507 * tree upwards to the parents which might _also_ now be
 508 * scheduled for deletion (it may have been only waiting for
 509 * its last child to go away).
 510 *
 511 * This tail recursion is done by hand as we don't want to depend
 512 * on the compiler to always get this right (gcc generally doesn't).
 513 * Real recursion would eat up our stack space.
 514 */
 515
 516/*
 517 * dput - release a dentry
 518 * @dentry: dentry to release 
 519 *
 520 * Release a dentry. This will drop the usage count and if appropriate
 521 * call the dentry unlink method as well as removing it from the queues and
 522 * releasing its resources. If the parent dentries were scheduled for release
 523 * they too may now get deleted.
 524 */
 525void dput(struct dentry *dentry)
 526{
 527        if (!dentry)
 528                return;
 529
 530repeat:
 531        if (dentry->d_count == 1)
 532                might_sleep();
 533        spin_lock(&dentry->d_lock);
 534        BUG_ON(!dentry->d_count);
 535        if (dentry->d_count > 1) {
 536                dentry->d_count--;
 537                spin_unlock(&dentry->d_lock);
 538                return;
 539        }
 540
 541        if (dentry->d_flags & DCACHE_OP_DELETE) {
 542                if (dentry->d_op->d_delete(dentry))
 543                        goto kill_it;
 544        }
 545
 546        /* Unreachable? Get rid of it */
 547        if (d_unhashed(dentry))
 548                goto kill_it;
 549
 550        dentry->d_flags |= DCACHE_REFERENCED;
 551        dentry_lru_add(dentry);
 552
 553        dentry->d_count--;
 554        spin_unlock(&dentry->d_lock);
 555        return;
 556
 557kill_it:
 558        dentry = dentry_kill(dentry, 1);
 559        if (dentry)
 560                goto repeat;
 561}
 562EXPORT_SYMBOL(dput);
 563
 564/**
 565 * d_invalidate - invalidate a dentry
 566 * @dentry: dentry to invalidate
 567 *
 568 * Try to invalidate the dentry if it turns out to be
 569 * possible. If there are other dentries that can be
 570 * reached through this one we can't delete it and we
 571 * return -EBUSY. On success we return 0.
 572 *
 573 * no dcache lock.
 574 */
 575 
 576int d_invalidate(struct dentry * dentry)
 577{
 578        /*
 579         * If it's already been dropped, return OK.
 580         */
 581        spin_lock(&dentry->d_lock);
 582        if (d_unhashed(dentry)) {
 583                spin_unlock(&dentry->d_lock);
 584                return 0;
 585        }
 586        /*
 587         * Check whether to do a partial shrink_dcache
 588         * to get rid of unused child entries.
 589         */
 590        if (!list_empty(&dentry->d_subdirs)) {
 591                spin_unlock(&dentry->d_lock);
 592                shrink_dcache_parent(dentry);
 593                spin_lock(&dentry->d_lock);
 594        }
 595
 596        /*
 597         * Somebody else still using it?
 598         *
 599         * If it's a directory, we can't drop it
 600         * for fear of somebody re-populating it
 601         * with children (even though dropping it
 602         * would make it unreachable from the root,
 603         * we might still populate it if it was a
 604         * working directory or similar).
 605         * We also need to leave mountpoints alone,
 606         * directory or not.
 607         */
 608        if (dentry->d_count > 1 && dentry->d_inode) {
 609                if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
 610                        spin_unlock(&dentry->d_lock);
 611                        return -EBUSY;
 612                }
 613        }
 614
 615        __d_drop(dentry);
 616        spin_unlock(&dentry->d_lock);
 617        return 0;
 618}
 619EXPORT_SYMBOL(d_invalidate);
 620
 621/* This must be called with d_lock held */
 622static inline void __dget_dlock(struct dentry *dentry)
 623{
 624        dentry->d_count++;
 625}
 626
 627static inline void __dget(struct dentry *dentry)
 628{
 629        spin_lock(&dentry->d_lock);
 630        __dget_dlock(dentry);
 631        spin_unlock(&dentry->d_lock);
 632}
 633
 634struct dentry *dget_parent(struct dentry *dentry)
 635{
 636        struct dentry *ret;
 637
 638repeat:
 639        /*
 640         * Don't need rcu_dereference because we re-check it was correct under
 641         * the lock.
 642         */
 643        rcu_read_lock();
 644        ret = dentry->d_parent;
 645        spin_lock(&ret->d_lock);
 646        if (unlikely(ret != dentry->d_parent)) {
 647                spin_unlock(&ret->d_lock);
 648                rcu_read_unlock();
 649                goto repeat;
 650        }
 651        rcu_read_unlock();
 652        BUG_ON(!ret->d_count);
 653        ret->d_count++;
 654        spin_unlock(&ret->d_lock);
 655        return ret;
 656}
 657EXPORT_SYMBOL(dget_parent);
 658
 659/**
 660 * d_find_alias - grab a hashed alias of inode
 661 * @inode: inode in question
 662 * @want_discon:  flag, used by d_splice_alias, to request
 663 *          that only a DISCONNECTED alias be returned.
 664 *
 665 * If inode has a hashed alias, or is a directory and has any alias,
 666 * acquire the reference to alias and return it. Otherwise return NULL.
 667 * Notice that if inode is a directory there can be only one alias and
 668 * it can be unhashed only if it has no children, or if it is the root
 669 * of a filesystem.
 670 *
 671 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 672 * any other hashed alias over that one unless @want_discon is set,
 673 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 674 */
 675static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 676{
 677        struct dentry *alias, *discon_alias;
 678        struct hlist_node *p;
 679
 680again:
 681        discon_alias = NULL;
 682        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
 683                spin_lock(&alias->d_lock);
 684                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 685                        if (IS_ROOT(alias) &&
 686                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 687                                discon_alias = alias;
 688                        } else if (!want_discon) {
 689                                __dget_dlock(alias);
 690                                spin_unlock(&alias->d_lock);
 691                                return alias;
 692                        }
 693                }
 694                spin_unlock(&alias->d_lock);
 695        }
 696        if (discon_alias) {
 697                alias = discon_alias;
 698                spin_lock(&alias->d_lock);
 699                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 700                        if (IS_ROOT(alias) &&
 701                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 702                                __dget_dlock(alias);
 703                                spin_unlock(&alias->d_lock);
 704                                return alias;
 705                        }
 706                }
 707                spin_unlock(&alias->d_lock);
 708                goto again;
 709        }
 710        return NULL;
 711}
 712
 713struct dentry *d_find_alias(struct inode *inode)
 714{
 715        struct dentry *de = NULL;
 716
 717        if (!hlist_empty(&inode->i_dentry)) {
 718                spin_lock(&inode->i_lock);
 719                de = __d_find_alias(inode, 0);
 720                spin_unlock(&inode->i_lock);
 721        }
 722        return de;
 723}
 724EXPORT_SYMBOL(d_find_alias);
 725
 726/*
 727 *      Try to kill dentries associated with this inode.
 728 * WARNING: you must own a reference to inode.
 729 */
 730void d_prune_aliases(struct inode *inode)
 731{
 732        struct dentry *dentry;
 733        struct hlist_node *p;
 734restart:
 735        spin_lock(&inode->i_lock);
 736        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
 737                spin_lock(&dentry->d_lock);
 738                if (!dentry->d_count) {
 739                        __dget_dlock(dentry);
 740                        __d_drop(dentry);
 741                        spin_unlock(&dentry->d_lock);
 742                        spin_unlock(&inode->i_lock);
 743                        dput(dentry);
 744                        goto restart;
 745                }
 746                spin_unlock(&dentry->d_lock);
 747        }
 748        spin_unlock(&inode->i_lock);
 749}
 750EXPORT_SYMBOL(d_prune_aliases);
 751
 752/*
 753 * Try to throw away a dentry - free the inode, dput the parent.
 754 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 755 * Releases dentry->d_lock.
 756 *
 757 * This may fail if locks cannot be acquired no problem, just try again.
 758 */
 759static void try_prune_one_dentry(struct dentry *dentry)
 760        __releases(dentry->d_lock)
 761{
 762        struct dentry *parent;
 763
 764        parent = dentry_kill(dentry, 0);
 765        /*
 766         * If dentry_kill returns NULL, we have nothing more to do.
 767         * if it returns the same dentry, trylocks failed. In either
 768         * case, just loop again.
 769         *
 770         * Otherwise, we need to prune ancestors too. This is necessary
 771         * to prevent quadratic behavior of shrink_dcache_parent(), but
 772         * is also expected to be beneficial in reducing dentry cache
 773         * fragmentation.
 774         */
 775        if (!parent)
 776                return;
 777        if (parent == dentry)
 778                return;
 779
 780        /* Prune ancestors. */
 781        dentry = parent;
 782        while (dentry) {
 783                spin_lock(&dentry->d_lock);
 784                if (dentry->d_count > 1) {
 785                        dentry->d_count--;
 786                        spin_unlock(&dentry->d_lock);
 787                        return;
 788                }
 789                dentry = dentry_kill(dentry, 1);
 790        }
 791}
 792
 793static void shrink_dentry_list(struct list_head *list)
 794{
 795        struct dentry *dentry;
 796
 797        rcu_read_lock();
 798        for (;;) {
 799                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 800                if (&dentry->d_lru == list)
 801                        break; /* empty */
 802                spin_lock(&dentry->d_lock);
 803                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 804                        spin_unlock(&dentry->d_lock);
 805                        continue;
 806                }
 807
 808                /*
 809                 * We found an inuse dentry which was not removed from
 810                 * the LRU because of laziness during lookup.  Do not free
 811                 * it - just keep it off the LRU list.
 812                 */
 813                if (dentry->d_count) {
 814                        dentry_lru_del(dentry);
 815                        spin_unlock(&dentry->d_lock);
 816                        continue;
 817                }
 818
 819                rcu_read_unlock();
 820
 821                try_prune_one_dentry(dentry);
 822
 823                rcu_read_lock();
 824        }
 825        rcu_read_unlock();
 826}
 827
 828/**
 829 * prune_dcache_sb - shrink the dcache
 830 * @sb: superblock
 831 * @count: number of entries to try to free
 832 *
 833 * Attempt to shrink the superblock dcache LRU by @count entries. This is
 834 * done when we need more memory an called from the superblock shrinker
 835 * function.
 836 *
 837 * This function may fail to free any resources if all the dentries are in
 838 * use.
 839 */
 840void prune_dcache_sb(struct super_block *sb, int count)
 841{
 842        struct dentry *dentry;
 843        LIST_HEAD(referenced);
 844        LIST_HEAD(tmp);
 845
 846relock:
 847        spin_lock(&dcache_lru_lock);
 848        while (!list_empty(&sb->s_dentry_lru)) {
 849                dentry = list_entry(sb->s_dentry_lru.prev,
 850                                struct dentry, d_lru);
 851                BUG_ON(dentry->d_sb != sb);
 852
 853                if (!spin_trylock(&dentry->d_lock)) {
 854                        spin_unlock(&dcache_lru_lock);
 855                        cpu_relax();
 856                        goto relock;
 857                }
 858
 859                if (dentry->d_flags & DCACHE_REFERENCED) {
 860                        dentry->d_flags &= ~DCACHE_REFERENCED;
 861                        list_move(&dentry->d_lru, &referenced);
 862                        spin_unlock(&dentry->d_lock);
 863                } else {
 864                        list_move_tail(&dentry->d_lru, &tmp);
 865                        dentry->d_flags |= DCACHE_SHRINK_LIST;
 866                        spin_unlock(&dentry->d_lock);
 867                        if (!--count)
 868                                break;
 869                }
 870                cond_resched_lock(&dcache_lru_lock);
 871        }
 872        if (!list_empty(&referenced))
 873                list_splice(&referenced, &sb->s_dentry_lru);
 874        spin_unlock(&dcache_lru_lock);
 875
 876        shrink_dentry_list(&tmp);
 877}
 878
 879/**
 880 * shrink_dcache_sb - shrink dcache for a superblock
 881 * @sb: superblock
 882 *
 883 * Shrink the dcache for the specified super block. This is used to free
 884 * the dcache before unmounting a file system.
 885 */
 886void shrink_dcache_sb(struct super_block *sb)
 887{
 888        LIST_HEAD(tmp);
 889
 890        spin_lock(&dcache_lru_lock);
 891        while (!list_empty(&sb->s_dentry_lru)) {
 892                list_splice_init(&sb->s_dentry_lru, &tmp);
 893                spin_unlock(&dcache_lru_lock);
 894                shrink_dentry_list(&tmp);
 895                spin_lock(&dcache_lru_lock);
 896        }
 897        spin_unlock(&dcache_lru_lock);
 898}
 899EXPORT_SYMBOL(shrink_dcache_sb);
 900
 901/*
 902 * destroy a single subtree of dentries for unmount
 903 * - see the comments on shrink_dcache_for_umount() for a description of the
 904 *   locking
 905 */
 906static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 907{
 908        struct dentry *parent;
 909
 910        BUG_ON(!IS_ROOT(dentry));
 911
 912        for (;;) {
 913                /* descend to the first leaf in the current subtree */
 914                while (!list_empty(&dentry->d_subdirs))
 915                        dentry = list_entry(dentry->d_subdirs.next,
 916                                            struct dentry, d_u.d_child);
 917
 918                /* consume the dentries from this leaf up through its parents
 919                 * until we find one with children or run out altogether */
 920                do {
 921                        struct inode *inode;
 922
 923                        /*
 924                         * remove the dentry from the lru, and inform
 925                         * the fs that this dentry is about to be
 926                         * unhashed and destroyed.
 927                         */
 928                        dentry_lru_prune(dentry);
 929                        __d_shrink(dentry);
 930
 931                        if (dentry->d_count != 0) {
 932                                printk(KERN_ERR
 933                                       "BUG: Dentry %p{i=%lx,n=%s}"
 934                                       " still in use (%d)"
 935                                       " [unmount of %s %s]\n",
 936                                       dentry,
 937                                       dentry->d_inode ?
 938                                       dentry->d_inode->i_ino : 0UL,
 939                                       dentry->d_name.name,
 940                                       dentry->d_count,
 941                                       dentry->d_sb->s_type->name,
 942                                       dentry->d_sb->s_id);
 943                                BUG();
 944                        }
 945
 946                        if (IS_ROOT(dentry)) {
 947                                parent = NULL;
 948                                list_del(&dentry->d_u.d_child);
 949                        } else {
 950                                parent = dentry->d_parent;
 951                                parent->d_count--;
 952                                list_del(&dentry->d_u.d_child);
 953                        }
 954
 955                        inode = dentry->d_inode;
 956                        if (inode) {
 957                                dentry->d_inode = NULL;
 958                                hlist_del_init(&dentry->d_alias);
 959                                if (dentry->d_op && dentry->d_op->d_iput)
 960                                        dentry->d_op->d_iput(dentry, inode);
 961                                else
 962                                        iput(inode);
 963                        }
 964
 965                        d_free(dentry);
 966
 967                        /* finished when we fall off the top of the tree,
 968                         * otherwise we ascend to the parent and move to the
 969                         * next sibling if there is one */
 970                        if (!parent)
 971                                return;
 972                        dentry = parent;
 973                } while (list_empty(&dentry->d_subdirs));
 974
 975                dentry = list_entry(dentry->d_subdirs.next,
 976                                    struct dentry, d_u.d_child);
 977        }
 978}
 979
 980/*
 981 * destroy the dentries attached to a superblock on unmounting
 982 * - we don't need to use dentry->d_lock because:
 983 *   - the superblock is detached from all mountings and open files, so the
 984 *     dentry trees will not be rearranged by the VFS
 985 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 986 *     any dentries belonging to this superblock that it comes across
 987 *   - the filesystem itself is no longer permitted to rearrange the dentries
 988 *     in this superblock
 989 */
 990void shrink_dcache_for_umount(struct super_block *sb)
 991{
 992        struct dentry *dentry;
 993
 994        if (down_read_trylock(&sb->s_umount))
 995                BUG();
 996
 997        dentry = sb->s_root;
 998        sb->s_root = NULL;
 999        dentry->d_count--;
1000        shrink_dcache_for_umount_subtree(dentry);
1001
1002        while (!hlist_bl_empty(&sb->s_anon)) {
1003                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1004                shrink_dcache_for_umount_subtree(dentry);
1005        }
1006}
1007
1008/*
1009 * This tries to ascend one level of parenthood, but
1010 * we can race with renaming, so we need to re-check
1011 * the parenthood after dropping the lock and check
1012 * that the sequence number still matches.
1013 */
1014static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1015{
1016        struct dentry *new = old->d_parent;
1017
1018        rcu_read_lock();
1019        spin_unlock(&old->d_lock);
1020        spin_lock(&new->d_lock);
1021
1022        /*
1023         * might go back up the wrong parent if we have had a rename
1024         * or deletion
1025         */
1026        if (new != old->d_parent ||
1027                 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1028                 (!locked && read_seqretry(&rename_lock, seq))) {
1029                spin_unlock(&new->d_lock);
1030                new = NULL;
1031        }
1032        rcu_read_unlock();
1033        return new;
1034}
1035
1036
1037/*
1038 * Search for at least 1 mount point in the dentry's subdirs.
1039 * We descend to the next level whenever the d_subdirs
1040 * list is non-empty and continue searching.
1041 */
1042 
1043/**
1044 * have_submounts - check for mounts over a dentry
1045 * @parent: dentry to check.
1046 *
1047 * Return true if the parent or its subdirectories contain
1048 * a mount point
1049 */
1050int have_submounts(struct dentry *parent)
1051{
1052        struct dentry *this_parent;
1053        struct list_head *next;
1054        unsigned seq;
1055        int locked = 0;
1056
1057        seq = read_seqbegin(&rename_lock);
1058again:
1059        this_parent = parent;
1060
1061        if (d_mountpoint(parent))
1062                goto positive;
1063        spin_lock(&this_parent->d_lock);
1064repeat:
1065        next = this_parent->d_subdirs.next;
1066resume:
1067        while (next != &this_parent->d_subdirs) {
1068                struct list_head *tmp = next;
1069                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1070                next = tmp->next;
1071
1072                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1073                /* Have we found a mount point ? */
1074                if (d_mountpoint(dentry)) {
1075                        spin_unlock(&dentry->d_lock);
1076                        spin_unlock(&this_parent->d_lock);
1077                        goto positive;
1078                }
1079                if (!list_empty(&dentry->d_subdirs)) {
1080                        spin_unlock(&this_parent->d_lock);
1081                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1082                        this_parent = dentry;
1083                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1084                        goto repeat;
1085                }
1086                spin_unlock(&dentry->d_lock);
1087        }
1088        /*
1089         * All done at this level ... ascend and resume the search.
1090         */
1091        if (this_parent != parent) {
1092                struct dentry *child = this_parent;
1093                this_parent = try_to_ascend(this_parent, locked, seq);
1094                if (!this_parent)
1095                        goto rename_retry;
1096                next = child->d_u.d_child.next;
1097                goto resume;
1098        }
1099        spin_unlock(&this_parent->d_lock);
1100        if (!locked && read_seqretry(&rename_lock, seq))
1101                goto rename_retry;
1102        if (locked)
1103                write_sequnlock(&rename_lock);
1104        return 0; /* No mount points found in tree */
1105positive:
1106        if (!locked && read_seqretry(&rename_lock, seq))
1107                goto rename_retry;
1108        if (locked)
1109                write_sequnlock(&rename_lock);
1110        return 1;
1111
1112rename_retry:
1113        if (locked)
1114                goto again;
1115        locked = 1;
1116        write_seqlock(&rename_lock);
1117        goto again;
1118}
1119EXPORT_SYMBOL(have_submounts);
1120
1121/*
1122 * Search the dentry child list of the specified parent,
1123 * and move any unused dentries to the end of the unused
1124 * list for prune_dcache(). We descend to the next level
1125 * whenever the d_subdirs list is non-empty and continue
1126 * searching.
1127 *
1128 * It returns zero iff there are no unused children,
1129 * otherwise  it returns the number of children moved to
1130 * the end of the unused list. This may not be the total
1131 * number of unused children, because select_parent can
1132 * drop the lock and return early due to latency
1133 * constraints.
1134 */
1135static int select_parent(struct dentry *parent, struct list_head *dispose)
1136{
1137        struct dentry *this_parent;
1138        struct list_head *next;
1139        unsigned seq;
1140        int found = 0;
1141        int locked = 0;
1142
1143        seq = read_seqbegin(&rename_lock);
1144again:
1145        this_parent = parent;
1146        spin_lock(&this_parent->d_lock);
1147repeat:
1148        next = this_parent->d_subdirs.next;
1149resume:
1150        while (next != &this_parent->d_subdirs) {
1151                struct list_head *tmp = next;
1152                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1153                next = tmp->next;
1154
1155                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1156
1157                /*
1158                 * move only zero ref count dentries to the dispose list.
1159                 *
1160                 * Those which are presently on the shrink list, being processed
1161                 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1162                 * loop in shrink_dcache_parent() might not make any progress
1163                 * and loop forever.
1164                 */
1165                if (dentry->d_count) {
1166                        dentry_lru_del(dentry);
1167                } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1168                        dentry_lru_move_list(dentry, dispose);
1169                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1170                        found++;
1171                }
1172                /*
1173                 * We can return to the caller if we have found some (this
1174                 * ensures forward progress). We'll be coming back to find
1175                 * the rest.
1176                 */
1177                if (found && need_resched()) {
1178                        spin_unlock(&dentry->d_lock);
1179                        goto out;
1180                }
1181
1182                /*
1183                 * Descend a level if the d_subdirs list is non-empty.
1184                 */
1185                if (!list_empty(&dentry->d_subdirs)) {
1186                        spin_unlock(&this_parent->d_lock);
1187                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1188                        this_parent = dentry;
1189                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1190                        goto repeat;
1191                }
1192
1193                spin_unlock(&dentry->d_lock);
1194        }
1195        /*
1196         * All done at this level ... ascend and resume the search.
1197         */
1198        if (this_parent != parent) {
1199                struct dentry *child = this_parent;
1200                this_parent = try_to_ascend(this_parent, locked, seq);
1201                if (!this_parent)
1202                        goto rename_retry;
1203                next = child->d_u.d_child.next;
1204                goto resume;
1205        }
1206out:
1207        spin_unlock(&this_parent->d_lock);
1208        if (!locked && read_seqretry(&rename_lock, seq))
1209                goto rename_retry;
1210        if (locked)
1211                write_sequnlock(&rename_lock);
1212        return found;
1213
1214rename_retry:
1215        if (found)
1216                return found;
1217        if (locked)
1218                goto again;
1219        locked = 1;
1220        write_seqlock(&rename_lock);
1221        goto again;
1222}
1223
1224/**
1225 * shrink_dcache_parent - prune dcache
1226 * @parent: parent of entries to prune
1227 *
1228 * Prune the dcache to remove unused children of the parent dentry.
1229 */
1230void shrink_dcache_parent(struct dentry * parent)
1231{
1232        LIST_HEAD(dispose);
1233        int found;
1234
1235        while ((found = select_parent(parent, &dispose)) != 0)
1236                shrink_dentry_list(&dispose);
1237}
1238EXPORT_SYMBOL(shrink_dcache_parent);
1239
1240/**
1241 * __d_alloc    -       allocate a dcache entry
1242 * @sb: filesystem it will belong to
1243 * @name: qstr of the name
1244 *
1245 * Allocates a dentry. It returns %NULL if there is insufficient memory
1246 * available. On a success the dentry is returned. The name passed in is
1247 * copied and the copy passed in may be reused after this call.
1248 */
1249 
1250struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1251{
1252        struct dentry *dentry;
1253        char *dname;
1254
1255        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1256        if (!dentry)
1257                return NULL;
1258
1259        /*
1260         * We guarantee that the inline name is always NUL-terminated.
1261         * This way the memcpy() done by the name switching in rename
1262         * will still always have a NUL at the end, even if we might
1263         * be overwriting an internal NUL character
1264         */
1265        dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1266        if (name->len > DNAME_INLINE_LEN-1) {
1267                dname = kmalloc(name->len + 1, GFP_KERNEL);
1268                if (!dname) {
1269                        kmem_cache_free(dentry_cache, dentry); 
1270                        return NULL;
1271                }
1272        } else  {
1273                dname = dentry->d_iname;
1274        }       
1275
1276        dentry->d_name.len = name->len;
1277        dentry->d_name.hash = name->hash;
1278        memcpy(dname, name->name, name->len);
1279        dname[name->len] = 0;
1280
1281        /* Make sure we always see the terminating NUL character */
1282        smp_wmb();
1283        dentry->d_name.name = dname;
1284
1285        dentry->d_count = 1;
1286        dentry->d_flags = 0;
1287        spin_lock_init(&dentry->d_lock);
1288        seqcount_init(&dentry->d_seq);
1289        dentry->d_inode = NULL;
1290        dentry->d_parent = dentry;
1291        dentry->d_sb = sb;
1292        dentry->d_op = NULL;
1293        dentry->d_fsdata = NULL;
1294        INIT_HLIST_BL_NODE(&dentry->d_hash);
1295        INIT_LIST_HEAD(&dentry->d_lru);
1296        INIT_LIST_HEAD(&dentry->d_subdirs);
1297        INIT_HLIST_NODE(&dentry->d_alias);
1298        INIT_LIST_HEAD(&dentry->d_u.d_child);
1299        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1300
1301        this_cpu_inc(nr_dentry);
1302
1303        return dentry;
1304}
1305
1306/**
1307 * d_alloc      -       allocate a dcache entry
1308 * @parent: parent of entry to allocate
1309 * @name: qstr of the name
1310 *
1311 * Allocates a dentry. It returns %NULL if there is insufficient memory
1312 * available. On a success the dentry is returned. The name passed in is
1313 * copied and the copy passed in may be reused after this call.
1314 */
1315struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1316{
1317        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1318        if (!dentry)
1319                return NULL;
1320
1321        spin_lock(&parent->d_lock);
1322        /*
1323         * don't need child lock because it is not subject
1324         * to concurrency here
1325         */
1326        __dget_dlock(parent);
1327        dentry->d_parent = parent;
1328        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1329        spin_unlock(&parent->d_lock);
1330
1331        return dentry;
1332}
1333EXPORT_SYMBOL(d_alloc);
1334
1335struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1336{
1337        struct dentry *dentry = __d_alloc(sb, name);
1338        if (dentry)
1339                dentry->d_flags |= DCACHE_DISCONNECTED;
1340        return dentry;
1341}
1342EXPORT_SYMBOL(d_alloc_pseudo);
1343
1344struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1345{
1346        struct qstr q;
1347
1348        q.name = name;
1349        q.len = strlen(name);
1350        q.hash = full_name_hash(q.name, q.len);
1351        return d_alloc(parent, &q);
1352}
1353EXPORT_SYMBOL(d_alloc_name);
1354
1355void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1356{
1357        WARN_ON_ONCE(dentry->d_op);
1358        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1359                                DCACHE_OP_COMPARE       |
1360                                DCACHE_OP_REVALIDATE    |
1361                                DCACHE_OP_DELETE ));
1362        dentry->d_op = op;
1363        if (!op)
1364                return;
1365        if (op->d_hash)
1366                dentry->d_flags |= DCACHE_OP_HASH;
1367        if (op->d_compare)
1368                dentry->d_flags |= DCACHE_OP_COMPARE;
1369        if (op->d_revalidate)
1370                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1371        if (op->d_delete)
1372                dentry->d_flags |= DCACHE_OP_DELETE;
1373        if (op->d_prune)
1374                dentry->d_flags |= DCACHE_OP_PRUNE;
1375
1376}
1377EXPORT_SYMBOL(d_set_d_op);
1378
1379static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1380{
1381        spin_lock(&dentry->d_lock);
1382        if (inode) {
1383                if (unlikely(IS_AUTOMOUNT(inode)))
1384                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1385                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1386        }
1387        dentry->d_inode = inode;
1388        dentry_rcuwalk_barrier(dentry);
1389        spin_unlock(&dentry->d_lock);
1390        fsnotify_d_instantiate(dentry, inode);
1391}
1392
1393/**
1394 * d_instantiate - fill in inode information for a dentry
1395 * @entry: dentry to complete
1396 * @inode: inode to attach to this dentry
1397 *
1398 * Fill in inode information in the entry.
1399 *
1400 * This turns negative dentries into productive full members
1401 * of society.
1402 *
1403 * NOTE! This assumes that the inode count has been incremented
1404 * (or otherwise set) by the caller to indicate that it is now
1405 * in use by the dcache.
1406 */
1407 
1408void d_instantiate(struct dentry *entry, struct inode * inode)
1409{
1410        BUG_ON(!hlist_unhashed(&entry->d_alias));
1411        if (inode)
1412                spin_lock(&inode->i_lock);
1413        __d_instantiate(entry, inode);
1414        if (inode)
1415                spin_unlock(&inode->i_lock);
1416        security_d_instantiate(entry, inode);
1417}
1418EXPORT_SYMBOL(d_instantiate);
1419
1420/**
1421 * d_instantiate_unique - instantiate a non-aliased dentry
1422 * @entry: dentry to instantiate
1423 * @inode: inode to attach to this dentry
1424 *
1425 * Fill in inode information in the entry. On success, it returns NULL.
1426 * If an unhashed alias of "entry" already exists, then we return the
1427 * aliased dentry instead and drop one reference to inode.
1428 *
1429 * Note that in order to avoid conflicts with rename() etc, the caller
1430 * had better be holding the parent directory semaphore.
1431 *
1432 * This also assumes that the inode count has been incremented
1433 * (or otherwise set) by the caller to indicate that it is now
1434 * in use by the dcache.
1435 */
1436static struct dentry *__d_instantiate_unique(struct dentry *entry,
1437                                             struct inode *inode)
1438{
1439        struct dentry *alias;
1440        int len = entry->d_name.len;
1441        const char *name = entry->d_name.name;
1442        unsigned int hash = entry->d_name.hash;
1443        struct hlist_node *p;
1444
1445        if (!inode) {
1446                __d_instantiate(entry, NULL);
1447                return NULL;
1448        }
1449
1450        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
1451                /*
1452                 * Don't need alias->d_lock here, because aliases with
1453                 * d_parent == entry->d_parent are not subject to name or
1454                 * parent changes, because the parent inode i_mutex is held.
1455                 */
1456                if (alias->d_name.hash != hash)
1457                        continue;
1458                if (alias->d_parent != entry->d_parent)
1459                        continue;
1460                if (alias->d_name.len != len)
1461                        continue;
1462                if (dentry_cmp(alias, name, len))
1463                        continue;
1464                __dget(alias);
1465                return alias;
1466        }
1467
1468        __d_instantiate(entry, inode);
1469        return NULL;
1470}
1471
1472struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1473{
1474        struct dentry *result;
1475
1476        BUG_ON(!hlist_unhashed(&entry->d_alias));
1477
1478        if (inode)
1479                spin_lock(&inode->i_lock);
1480        result = __d_instantiate_unique(entry, inode);
1481        if (inode)
1482                spin_unlock(&inode->i_lock);
1483
1484        if (!result) {
1485                security_d_instantiate(entry, inode);
1486                return NULL;
1487        }
1488
1489        BUG_ON(!d_unhashed(result));
1490        iput(inode);
1491        return result;
1492}
1493
1494EXPORT_SYMBOL(d_instantiate_unique);
1495
1496struct dentry *d_make_root(struct inode *root_inode)
1497{
1498        struct dentry *res = NULL;
1499
1500        if (root_inode) {
1501                static const struct qstr name = QSTR_INIT("/", 1);
1502
1503                res = __d_alloc(root_inode->i_sb, &name);
1504                if (res)
1505                        d_instantiate(res, root_inode);
1506                else
1507                        iput(root_inode);
1508        }
1509        return res;
1510}
1511EXPORT_SYMBOL(d_make_root);
1512
1513static struct dentry * __d_find_any_alias(struct inode *inode)
1514{
1515        struct dentry *alias;
1516
1517        if (hlist_empty(&inode->i_dentry))
1518                return NULL;
1519        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
1520        __dget(alias);
1521        return alias;
1522}
1523
1524/**
1525 * d_find_any_alias - find any alias for a given inode
1526 * @inode: inode to find an alias for
1527 *
1528 * If any aliases exist for the given inode, take and return a
1529 * reference for one of them.  If no aliases exist, return %NULL.
1530 */
1531struct dentry *d_find_any_alias(struct inode *inode)
1532{
1533        struct dentry *de;
1534
1535        spin_lock(&inode->i_lock);
1536        de = __d_find_any_alias(inode);
1537        spin_unlock(&inode->i_lock);
1538        return de;
1539}
1540EXPORT_SYMBOL(d_find_any_alias);
1541
1542/**
1543 * d_obtain_alias - find or allocate a dentry for a given inode
1544 * @inode: inode to allocate the dentry for
1545 *
1546 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1547 * similar open by handle operations.  The returned dentry may be anonymous,
1548 * or may have a full name (if the inode was already in the cache).
1549 *
1550 * When called on a directory inode, we must ensure that the inode only ever
1551 * has one dentry.  If a dentry is found, that is returned instead of
1552 * allocating a new one.
1553 *
1554 * On successful return, the reference to the inode has been transferred
1555 * to the dentry.  In case of an error the reference on the inode is released.
1556 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1557 * be passed in and will be the error will be propagate to the return value,
1558 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1559 */
1560struct dentry *d_obtain_alias(struct inode *inode)
1561{
1562        static const struct qstr anonstring = QSTR_INIT("/", 1);
1563        struct dentry *tmp;
1564        struct dentry *res;
1565
1566        if (!inode)
1567                return ERR_PTR(-ESTALE);
1568        if (IS_ERR(inode))
1569                return ERR_CAST(inode);
1570
1571        res = d_find_any_alias(inode);
1572        if (res)
1573                goto out_iput;
1574
1575        tmp = __d_alloc(inode->i_sb, &anonstring);
1576        if (!tmp) {
1577                res = ERR_PTR(-ENOMEM);
1578                goto out_iput;
1579        }
1580
1581        spin_lock(&inode->i_lock);
1582        res = __d_find_any_alias(inode);
1583        if (res) {
1584                spin_unlock(&inode->i_lock);
1585                dput(tmp);
1586                goto out_iput;
1587        }
1588
1589        /* attach a disconnected dentry */
1590        spin_lock(&tmp->d_lock);
1591        tmp->d_inode = inode;
1592        tmp->d_flags |= DCACHE_DISCONNECTED;
1593        hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1594        hlist_bl_lock(&tmp->d_sb->s_anon);
1595        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1596        hlist_bl_unlock(&tmp->d_sb->s_anon);
1597        spin_unlock(&tmp->d_lock);
1598        spin_unlock(&inode->i_lock);
1599        security_d_instantiate(tmp, inode);
1600
1601        return tmp;
1602
1603 out_iput:
1604        if (res && !IS_ERR(res))
1605                security_d_instantiate(res, inode);
1606        iput(inode);
1607        return res;
1608}
1609EXPORT_SYMBOL(d_obtain_alias);
1610
1611/**
1612 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1613 * @inode:  the inode which may have a disconnected dentry
1614 * @dentry: a negative dentry which we want to point to the inode.
1615 *
1616 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1617 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1618 * and return it, else simply d_add the inode to the dentry and return NULL.
1619 *
1620 * This is needed in the lookup routine of any filesystem that is exportable
1621 * (via knfsd) so that we can build dcache paths to directories effectively.
1622 *
1623 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1624 * is returned.  This matches the expected return value of ->lookup.
1625 *
1626 */
1627struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1628{
1629        struct dentry *new = NULL;
1630
1631        if (IS_ERR(inode))
1632                return ERR_CAST(inode);
1633
1634        if (inode && S_ISDIR(inode->i_mode)) {
1635                spin_lock(&inode->i_lock);
1636                new = __d_find_alias(inode, 1);
1637                if (new) {
1638                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1639                        spin_unlock(&inode->i_lock);
1640                        security_d_instantiate(new, inode);
1641                        d_move(new, dentry);
1642                        iput(inode);
1643                } else {
1644                        /* already taking inode->i_lock, so d_add() by hand */
1645                        __d_instantiate(dentry, inode);
1646                        spin_unlock(&inode->i_lock);
1647                        security_d_instantiate(dentry, inode);
1648                        d_rehash(dentry);
1649                }
1650        } else
1651                d_add(dentry, inode);
1652        return new;
1653}
1654EXPORT_SYMBOL(d_splice_alias);
1655
1656/**
1657 * d_add_ci - lookup or allocate new dentry with case-exact name
1658 * @inode:  the inode case-insensitive lookup has found
1659 * @dentry: the negative dentry that was passed to the parent's lookup func
1660 * @name:   the case-exact name to be associated with the returned dentry
1661 *
1662 * This is to avoid filling the dcache with case-insensitive names to the
1663 * same inode, only the actual correct case is stored in the dcache for
1664 * case-insensitive filesystems.
1665 *
1666 * For a case-insensitive lookup match and if the the case-exact dentry
1667 * already exists in in the dcache, use it and return it.
1668 *
1669 * If no entry exists with the exact case name, allocate new dentry with
1670 * the exact case, and return the spliced entry.
1671 */
1672struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1673                        struct qstr *name)
1674{
1675        int error;
1676        struct dentry *found;
1677        struct dentry *new;
1678
1679        /*
1680         * First check if a dentry matching the name already exists,
1681         * if not go ahead and create it now.
1682         */
1683        found = d_hash_and_lookup(dentry->d_parent, name);
1684        if (!found) {
1685                new = d_alloc(dentry->d_parent, name);
1686                if (!new) {
1687                        error = -ENOMEM;
1688                        goto err_out;
1689                }
1690
1691                found = d_splice_alias(inode, new);
1692                if (found) {
1693                        dput(new);
1694                        return found;
1695                }
1696                return new;
1697        }
1698
1699        /*
1700         * If a matching dentry exists, and it's not negative use it.
1701         *
1702         * Decrement the reference count to balance the iget() done
1703         * earlier on.
1704         */
1705        if (found->d_inode) {
1706                if (unlikely(found->d_inode != inode)) {
1707                        /* This can't happen because bad inodes are unhashed. */
1708                        BUG_ON(!is_bad_inode(inode));
1709                        BUG_ON(!is_bad_inode(found->d_inode));
1710                }
1711                iput(inode);
1712                return found;
1713        }
1714
1715        /*
1716         * Negative dentry: instantiate it unless the inode is a directory and
1717         * already has a dentry.
1718         */
1719        new = d_splice_alias(inode, found);
1720        if (new) {
1721                dput(found);
1722                found = new;
1723        }
1724        return found;
1725
1726err_out:
1727        iput(inode);
1728        return ERR_PTR(error);
1729}
1730EXPORT_SYMBOL(d_add_ci);
1731
1732/*
1733 * Do the slow-case of the dentry name compare.
1734 *
1735 * Unlike the dentry_cmp() function, we need to atomically
1736 * load the name, length and inode information, so that the
1737 * filesystem can rely on them, and can use the 'name' and
1738 * 'len' information without worrying about walking off the
1739 * end of memory etc.
1740 *
1741 * Thus the read_seqcount_retry() and the "duplicate" info
1742 * in arguments (the low-level filesystem should not look
1743 * at the dentry inode or name contents directly, since
1744 * rename can change them while we're in RCU mode).
1745 */
1746enum slow_d_compare {
1747        D_COMP_OK,
1748        D_COMP_NOMATCH,
1749        D_COMP_SEQRETRY,
1750};
1751
1752static noinline enum slow_d_compare slow_dentry_cmp(
1753                const struct dentry *parent,
1754                struct inode *inode,
1755                struct dentry *dentry,
1756                unsigned int seq,
1757                const struct qstr *name)
1758{
1759        int tlen = dentry->d_name.len;
1760        const char *tname = dentry->d_name.name;
1761        struct inode *i = dentry->d_inode;
1762
1763        if (read_seqcount_retry(&dentry->d_seq, seq)) {
1764                cpu_relax();
1765                return D_COMP_SEQRETRY;
1766        }
1767        if (parent->d_op->d_compare(parent, inode,
1768                                dentry, i,
1769                                tlen, tname, name))
1770                return D_COMP_NOMATCH;
1771        return D_COMP_OK;
1772}
1773
1774/**
1775 * __d_lookup_rcu - search for a dentry (racy, store-free)
1776 * @parent: parent dentry
1777 * @name: qstr of name we wish to find
1778 * @seqp: returns d_seq value at the point where the dentry was found
1779 * @inode: returns dentry->d_inode when the inode was found valid.
1780 * Returns: dentry, or NULL
1781 *
1782 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1783 * resolution (store-free path walking) design described in
1784 * Documentation/filesystems/path-lookup.txt.
1785 *
1786 * This is not to be used outside core vfs.
1787 *
1788 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1789 * held, and rcu_read_lock held. The returned dentry must not be stored into
1790 * without taking d_lock and checking d_seq sequence count against @seq
1791 * returned here.
1792 *
1793 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1794 * function.
1795 *
1796 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1797 * the returned dentry, so long as its parent's seqlock is checked after the
1798 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1799 * is formed, giving integrity down the path walk.
1800 *
1801 * NOTE! The caller *has* to check the resulting dentry against the sequence
1802 * number we've returned before using any of the resulting dentry state!
1803 */
1804struct dentry *__d_lookup_rcu(const struct dentry *parent,
1805                                const struct qstr *name,
1806                                unsigned *seqp, struct inode *inode)
1807{
1808        u64 hashlen = name->hash_len;
1809        const unsigned char *str = name->name;
1810        struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
1811        struct hlist_bl_node *node;
1812        struct dentry *dentry;
1813
1814        /*
1815         * Note: There is significant duplication with __d_lookup_rcu which is
1816         * required to prevent single threaded performance regressions
1817         * especially on architectures where smp_rmb (in seqcounts) are costly.
1818         * Keep the two functions in sync.
1819         */
1820
1821        /*
1822         * The hash list is protected using RCU.
1823         *
1824         * Carefully use d_seq when comparing a candidate dentry, to avoid
1825         * races with d_move().
1826         *
1827         * It is possible that concurrent renames can mess up our list
1828         * walk here and result in missing our dentry, resulting in the
1829         * false-negative result. d_lookup() protects against concurrent
1830         * renames using rename_lock seqlock.
1831         *
1832         * See Documentation/filesystems/path-lookup.txt for more details.
1833         */
1834        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1835                unsigned seq;
1836
1837seqretry:
1838                /*
1839                 * The dentry sequence count protects us from concurrent
1840                 * renames, and thus protects inode, parent and name fields.
1841                 *
1842                 * The caller must perform a seqcount check in order
1843                 * to do anything useful with the returned dentry,
1844                 * including using the 'd_inode' pointer.
1845                 *
1846                 * NOTE! We do a "raw" seqcount_begin here. That means that
1847                 * we don't wait for the sequence count to stabilize if it
1848                 * is in the middle of a sequence change. If we do the slow
1849                 * dentry compare, we will do seqretries until it is stable,
1850                 * and if we end up with a successful lookup, we actually
1851                 * want to exit RCU lookup anyway.
1852                 */
1853                seq = raw_seqcount_begin(&dentry->d_seq);
1854                if (dentry->d_parent != parent)
1855                        continue;
1856                if (d_unhashed(dentry))
1857                        continue;
1858                *seqp = seq;
1859
1860                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1861                        if (dentry->d_name.hash != hashlen_hash(hashlen))
1862                                continue;
1863                        switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
1864                        case D_COMP_OK:
1865                                return dentry;
1866                        case D_COMP_NOMATCH:
1867                                continue;
1868                        default:
1869                                goto seqretry;
1870                        }
1871                }
1872
1873                if (dentry->d_name.hash_len != hashlen)
1874                        continue;
1875                if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
1876                        return dentry;
1877        }
1878        return NULL;
1879}
1880
1881/**
1882 * d_lookup - search for a dentry
1883 * @parent: parent dentry
1884 * @name: qstr of name we wish to find
1885 * Returns: dentry, or NULL
1886 *
1887 * d_lookup searches the children of the parent dentry for the name in
1888 * question. If the dentry is found its reference count is incremented and the
1889 * dentry is returned. The caller must use dput to free the entry when it has
1890 * finished using it. %NULL is returned if the dentry does not exist.
1891 */
1892struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1893{
1894        struct dentry *dentry;
1895        unsigned seq;
1896
1897        do {
1898                seq = read_seqbegin(&rename_lock);
1899                dentry = __d_lookup(parent, name);
1900                if (dentry)
1901                        break;
1902        } while (read_seqretry(&rename_lock, seq));
1903        return dentry;
1904}
1905EXPORT_SYMBOL(d_lookup);
1906
1907/**
1908 * __d_lookup - search for a dentry (racy)
1909 * @parent: parent dentry
1910 * @name: qstr of name we wish to find
1911 * Returns: dentry, or NULL
1912 *
1913 * __d_lookup is like d_lookup, however it may (rarely) return a
1914 * false-negative result due to unrelated rename activity.
1915 *
1916 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1917 * however it must be used carefully, eg. with a following d_lookup in
1918 * the case of failure.
1919 *
1920 * __d_lookup callers must be commented.
1921 */
1922struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1923{
1924        unsigned int len = name->len;
1925        unsigned int hash = name->hash;
1926        const unsigned char *str = name->name;
1927        struct hlist_bl_head *b = d_hash(parent, hash);
1928        struct hlist_bl_node *node;
1929        struct dentry *found = NULL;
1930        struct dentry *dentry;
1931
1932        /*
1933         * Note: There is significant duplication with __d_lookup_rcu which is
1934         * required to prevent single threaded performance regressions
1935         * especially on architectures where smp_rmb (in seqcounts) are costly.
1936         * Keep the two functions in sync.
1937         */
1938
1939        /*
1940         * The hash list is protected using RCU.
1941         *
1942         * Take d_lock when comparing a candidate dentry, to avoid races
1943         * with d_move().
1944         *
1945         * It is possible that concurrent renames can mess up our list
1946         * walk here and result in missing our dentry, resulting in the
1947         * false-negative result. d_lookup() protects against concurrent
1948         * renames using rename_lock seqlock.
1949         *
1950         * See Documentation/filesystems/path-lookup.txt for more details.
1951         */
1952        rcu_read_lock();
1953        
1954        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1955
1956                if (dentry->d_name.hash != hash)
1957                        continue;
1958
1959                spin_lock(&dentry->d_lock);
1960                if (dentry->d_parent != parent)
1961                        goto next;
1962                if (d_unhashed(dentry))
1963                        goto next;
1964
1965                /*
1966                 * It is safe to compare names since d_move() cannot
1967                 * change the qstr (protected by d_lock).
1968                 */
1969                if (parent->d_flags & DCACHE_OP_COMPARE) {
1970                        int tlen = dentry->d_name.len;
1971                        const char *tname = dentry->d_name.name;
1972                        if (parent->d_op->d_compare(parent, parent->d_inode,
1973                                                dentry, dentry->d_inode,
1974                                                tlen, tname, name))
1975                                goto next;
1976                } else {
1977                        if (dentry->d_name.len != len)
1978                                goto next;
1979                        if (dentry_cmp(dentry, str, len))
1980                                goto next;
1981                }
1982
1983                dentry->d_count++;
1984                found = dentry;
1985                spin_unlock(&dentry->d_lock);
1986                break;
1987next:
1988                spin_unlock(&dentry->d_lock);
1989        }
1990        rcu_read_unlock();
1991
1992        return found;
1993}
1994
1995/**
1996 * d_hash_and_lookup - hash the qstr then search for a dentry
1997 * @dir: Directory to search in
1998 * @name: qstr of name we wish to find
1999 *
2000 * On hash failure or on lookup failure NULL is returned.
2001 */
2002struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2003{
2004        struct dentry *dentry = NULL;
2005
2006        /*
2007         * Check for a fs-specific hash function. Note that we must
2008         * calculate the standard hash first, as the d_op->d_hash()
2009         * routine may choose to leave the hash value unchanged.
2010         */
2011        name->hash = full_name_hash(name->name, name->len);
2012        if (dir->d_flags & DCACHE_OP_HASH) {
2013                if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
2014                        goto out;
2015        }
2016        dentry = d_lookup(dir, name);
2017out:
2018        return dentry;
2019}
2020
2021/**
2022 * d_validate - verify dentry provided from insecure source (deprecated)
2023 * @dentry: The dentry alleged to be valid child of @dparent
2024 * @dparent: The parent dentry (known to be valid)
2025 *
2026 * An insecure source has sent us a dentry, here we verify it and dget() it.
2027 * This is used by ncpfs in its readdir implementation.
2028 * Zero is returned in the dentry is invalid.
2029 *
2030 * This function is slow for big directories, and deprecated, do not use it.
2031 */
2032int d_validate(struct dentry *dentry, struct dentry *dparent)
2033{
2034        struct dentry *child;
2035
2036        spin_lock(&dparent->d_lock);
2037        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2038                if (dentry == child) {
2039                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2040                        __dget_dlock(dentry);
2041                        spin_unlock(&dentry->d_lock);
2042                        spin_unlock(&dparent->d_lock);
2043                        return 1;
2044                }
2045        }
2046        spin_unlock(&dparent->d_lock);
2047
2048        return 0;
2049}
2050EXPORT_SYMBOL(d_validate);
2051
2052/*
2053 * When a file is deleted, we have two options:
2054 * - turn this dentry into a negative dentry
2055 * - unhash this dentry and free it.
2056 *
2057 * Usually, we want to just turn this into
2058 * a negative dentry, but if anybody else is
2059 * currently using the dentry or the inode
2060 * we can't do that and we fall back on removing
2061 * it from the hash queues and waiting for
2062 * it to be deleted later when it has no users
2063 */
2064 
2065/**
2066 * d_delete - delete a dentry
2067 * @dentry: The dentry to delete
2068 *
2069 * Turn the dentry into a negative dentry if possible, otherwise
2070 * remove it from the hash queues so it can be deleted later
2071 */
2072 
2073void d_delete(struct dentry * dentry)
2074{
2075        struct inode *inode;
2076        int isdir = 0;
2077        /*
2078         * Are we the only user?
2079         */
2080again:
2081        spin_lock(&dentry->d_lock);
2082        inode = dentry->d_inode;
2083        isdir = S_ISDIR(inode->i_mode);
2084        if (dentry->d_count == 1) {
2085                if (!spin_trylock(&inode->i_lock)) {
2086                        spin_unlock(&dentry->d_lock);
2087                        cpu_relax();
2088                        goto again;
2089                }
2090                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2091                dentry_unlink_inode(dentry);
2092                fsnotify_nameremove(dentry, isdir);
2093                return;
2094        }
2095
2096        if (!d_unhashed(dentry))
2097                __d_drop(dentry);
2098
2099        spin_unlock(&dentry->d_lock);
2100
2101        fsnotify_nameremove(dentry, isdir);
2102}
2103EXPORT_SYMBOL(d_delete);
2104
2105static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2106{
2107        BUG_ON(!d_unhashed(entry));
2108        hlist_bl_lock(b);
2109        entry->d_flags |= DCACHE_RCUACCESS;
2110        hlist_bl_add_head_rcu(&entry->d_hash, b);
2111        hlist_bl_unlock(b);
2112}
2113
2114static void _d_rehash(struct dentry * entry)
2115{
2116        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2117}
2118
2119/**
2120 * d_rehash     - add an entry back to the hash
2121 * @entry: dentry to add to the hash
2122 *
2123 * Adds a dentry to the hash according to its name.
2124 */
2125 
2126void d_rehash(struct dentry * entry)
2127{
2128        spin_lock(&entry->d_lock);
2129        _d_rehash(entry);
2130        spin_unlock(&entry->d_lock);
2131}
2132EXPORT_SYMBOL(d_rehash);
2133
2134/**
2135 * dentry_update_name_case - update case insensitive dentry with a new name
2136 * @dentry: dentry to be updated
2137 * @name: new name
2138 *
2139 * Update a case insensitive dentry with new case of name.
2140 *
2141 * dentry must have been returned by d_lookup with name @name. Old and new
2142 * name lengths must match (ie. no d_compare which allows mismatched name
2143 * lengths).
2144 *
2145 * Parent inode i_mutex must be held over d_lookup and into this call (to
2146 * keep renames and concurrent inserts, and readdir(2) away).
2147 */
2148void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2149{
2150        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2151        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2152
2153        spin_lock(&dentry->d_lock);
2154        write_seqcount_begin(&dentry->d_seq);
2155        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2156        write_seqcount_end(&dentry->d_seq);
2157        spin_unlock(&dentry->d_lock);
2158}
2159EXPORT_SYMBOL(dentry_update_name_case);
2160
2161static void switch_names(struct dentry *dentry, struct dentry *target)
2162{
2163        if (dname_external(target)) {
2164                if (dname_external(dentry)) {
2165                        /*
2166                         * Both external: swap the pointers
2167                         */
2168                        swap(target->d_name.name, dentry->d_name.name);
2169                } else {
2170                        /*
2171                         * dentry:internal, target:external.  Steal target's
2172                         * storage and make target internal.
2173                         */
2174                        memcpy(target->d_iname, dentry->d_name.name,
2175                                        dentry->d_name.len + 1);
2176                        dentry->d_name.name = target->d_name.name;
2177                        target->d_name.name = target->d_iname;
2178                }
2179        } else {
2180                if (dname_external(dentry)) {
2181                        /*
2182                         * dentry:external, target:internal.  Give dentry's
2183                         * storage to target and make dentry internal
2184                         */
2185                        memcpy(dentry->d_iname, target->d_name.name,
2186                                        target->d_name.len + 1);
2187                        target->d_name.name = dentry->d_name.name;
2188                        dentry->d_name.name = dentry->d_iname;
2189                } else {
2190                        /*
2191                         * Both are internal.  Just copy target to dentry
2192                         */
2193                        memcpy(dentry->d_iname, target->d_name.name,
2194                                        target->d_name.len + 1);
2195                        dentry->d_name.len = target->d_name.len;
2196                        return;
2197                }
2198        }
2199        swap(dentry->d_name.len, target->d_name.len);
2200}
2201
2202static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2203{
2204        /*
2205         * XXXX: do we really need to take target->d_lock?
2206         */
2207        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2208                spin_lock(&target->d_parent->d_lock);
2209        else {
2210                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2211                        spin_lock(&dentry->d_parent->d_lock);
2212                        spin_lock_nested(&target->d_parent->d_lock,
2213                                                DENTRY_D_LOCK_NESTED);
2214                } else {
2215                        spin_lock(&target->d_parent->d_lock);
2216                        spin_lock_nested(&dentry->d_parent->d_lock,
2217                                                DENTRY_D_LOCK_NESTED);
2218                }
2219        }
2220        if (target < dentry) {
2221                spin_lock_nested(&target->d_lock, 2);
2222                spin_lock_nested(&dentry->d_lock, 3);
2223        } else {
2224                spin_lock_nested(&dentry->d_lock, 2);
2225                spin_lock_nested(&target->d_lock, 3);
2226        }
2227}
2228
2229static void dentry_unlock_parents_for_move(struct dentry *dentry,
2230                                        struct dentry *target)
2231{
2232        if (target->d_parent != dentry->d_parent)
2233                spin_unlock(&dentry->d_parent->d_lock);
2234        if (target->d_parent != target)
2235                spin_unlock(&target->d_parent->d_lock);
2236}
2237
2238/*
2239 * When switching names, the actual string doesn't strictly have to
2240 * be preserved in the target - because we're dropping the target
2241 * anyway. As such, we can just do a simple memcpy() to copy over
2242 * the new name before we switch.
2243 *
2244 * Note that we have to be a lot more careful about getting the hash
2245 * switched - we have to switch the hash value properly even if it
2246 * then no longer matches the actual (corrupted) string of the target.
2247 * The hash value has to match the hash queue that the dentry is on..
2248 */
2249/*
2250 * __d_move - move a dentry
2251 * @dentry: entry to move
2252 * @target: new dentry
2253 *
2254 * Update the dcache to reflect the move of a file name. Negative
2255 * dcache entries should not be moved in this way. Caller must hold
2256 * rename_lock, the i_mutex of the source and target directories,
2257 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2258 */
2259static void __d_move(struct dentry * dentry, struct dentry * target)
2260{
2261        if (!dentry->d_inode)
2262                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2263
2264        BUG_ON(d_ancestor(dentry, target));
2265        BUG_ON(d_ancestor(target, dentry));
2266
2267        dentry_lock_for_move(dentry, target);
2268
2269        write_seqcount_begin(&dentry->d_seq);
2270        write_seqcount_begin(&target->d_seq);
2271
2272        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2273
2274        /*
2275         * Move the dentry to the target hash queue. Don't bother checking
2276         * for the same hash queue because of how unlikely it is.
2277         */
2278        __d_drop(dentry);
2279        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2280
2281        /* Unhash the target: dput() will then get rid of it */
2282        __d_drop(target);
2283
2284        list_del(&dentry->d_u.d_child);
2285        list_del(&target->d_u.d_child);
2286
2287        /* Switch the names.. */
2288        switch_names(dentry, target);
2289        swap(dentry->d_name.hash, target->d_name.hash);
2290
2291        /* ... and switch the parents */
2292        if (IS_ROOT(dentry)) {
2293                dentry->d_parent = target->d_parent;
2294                target->d_parent = target;
2295                INIT_LIST_HEAD(&target->d_u.d_child);
2296        } else {
2297                swap(dentry->d_parent, target->d_parent);
2298
2299                /* And add them back to the (new) parent lists */
2300                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2301        }
2302
2303        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2304
2305        write_seqcount_end(&target->d_seq);
2306        write_seqcount_end(&dentry->d_seq);
2307
2308        dentry_unlock_parents_for_move(dentry, target);
2309        spin_unlock(&target->d_lock);
2310        fsnotify_d_move(dentry);
2311        spin_unlock(&dentry->d_lock);
2312}
2313
2314/*
2315 * d_move - move a dentry
2316 * @dentry: entry to move
2317 * @target: new dentry
2318 *
2319 * Update the dcache to reflect the move of a file name. Negative
2320 * dcache entries should not be moved in this way. See the locking
2321 * requirements for __d_move.
2322 */
2323void d_move(struct dentry *dentry, struct dentry *target)
2324{
2325        write_seqlock(&rename_lock);
2326        __d_move(dentry, target);
2327        write_sequnlock(&rename_lock);
2328}
2329EXPORT_SYMBOL(d_move);
2330
2331/**
2332 * d_ancestor - search for an ancestor
2333 * @p1: ancestor dentry
2334 * @p2: child dentry
2335 *
2336 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2337 * an ancestor of p2, else NULL.
2338 */
2339struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2340{
2341        struct dentry *p;
2342
2343        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2344                if (p->d_parent == p1)
2345                        return p;
2346        }
2347        return NULL;
2348}
2349
2350/*
2351 * This helper attempts to cope with remotely renamed directories
2352 *
2353 * It assumes that the caller is already holding
2354 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2355 *
2356 * Note: If ever the locking in lock_rename() changes, then please
2357 * remember to update this too...
2358 */
2359static struct dentry *__d_unalias(struct inode *inode,
2360                struct dentry *dentry, struct dentry *alias)
2361{
2362        struct mutex *m1 = NULL, *m2 = NULL;
2363        struct dentry *ret = ERR_PTR(-EBUSY);
2364
2365        /* If alias and dentry share a parent, then no extra locks required */
2366        if (alias->d_parent == dentry->d_parent)
2367                goto out_unalias;
2368
2369        /* See lock_rename() */
2370        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2371                goto out_err;
2372        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2373        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2374                goto out_err;
2375        m2 = &alias->d_parent->d_inode->i_mutex;
2376out_unalias:
2377        if (likely(!d_mountpoint(alias))) {
2378                __d_move(alias, dentry);
2379                ret = alias;
2380        }
2381out_err:
2382        spin_unlock(&inode->i_lock);
2383        if (m2)
2384                mutex_unlock(m2);
2385        if (m1)
2386                mutex_unlock(m1);
2387        return ret;
2388}
2389
2390/*
2391 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2392 * named dentry in place of the dentry to be replaced.
2393 * returns with anon->d_lock held!
2394 */
2395static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2396{
2397        struct dentry *dparent, *aparent;
2398
2399        dentry_lock_for_move(anon, dentry);
2400
2401        write_seqcount_begin(&dentry->d_seq);
2402        write_seqcount_begin(&anon->d_seq);
2403
2404        dparent = dentry->d_parent;
2405        aparent = anon->d_parent;
2406
2407        switch_names(dentry, anon);
2408        swap(dentry->d_name.hash, anon->d_name.hash);
2409
2410        dentry->d_parent = (aparent == anon) ? dentry : aparent;
2411        list_del(&dentry->d_u.d_child);
2412        if (!IS_ROOT(dentry))
2413                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2414        else
2415                INIT_LIST_HEAD(&dentry->d_u.d_child);
2416
2417        anon->d_parent = (dparent == dentry) ? anon : dparent;
2418        list_del(&anon->d_u.d_child);
2419        if (!IS_ROOT(anon))
2420                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2421        else
2422                INIT_LIST_HEAD(&anon->d_u.d_child);
2423
2424        write_seqcount_end(&dentry->d_seq);
2425        write_seqcount_end(&anon->d_seq);
2426
2427        dentry_unlock_parents_for_move(anon, dentry);
2428        spin_unlock(&dentry->d_lock);
2429
2430        /* anon->d_lock still locked, returns locked */
2431        anon->d_flags &= ~DCACHE_DISCONNECTED;
2432}
2433
2434/**
2435 * d_materialise_unique - introduce an inode into the tree
2436 * @dentry: candidate dentry
2437 * @inode: inode to bind to the dentry, to which aliases may be attached
2438 *
2439 * Introduces an dentry into the tree, substituting an extant disconnected
2440 * root directory alias in its place if there is one. Caller must hold the
2441 * i_mutex of the parent directory.
2442 */
2443struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2444{
2445        struct dentry *actual;
2446
2447        BUG_ON(!d_unhashed(dentry));
2448
2449        if (!inode) {
2450                actual = dentry;
2451                __d_instantiate(dentry, NULL);
2452                d_rehash(actual);
2453                goto out_nolock;
2454        }
2455
2456        spin_lock(&inode->i_lock);
2457
2458        if (S_ISDIR(inode->i_mode)) {
2459                struct dentry *alias;
2460
2461                /* Does an aliased dentry already exist? */
2462                alias = __d_find_alias(inode, 0);
2463                if (alias) {
2464                        actual = alias;
2465                        write_seqlock(&rename_lock);
2466
2467                        if (d_ancestor(alias, dentry)) {
2468                                /* Check for loops */
2469                                actual = ERR_PTR(-ELOOP);
2470                                spin_unlock(&inode->i_lock);
2471                        } else if (IS_ROOT(alias)) {
2472                                /* Is this an anonymous mountpoint that we
2473                                 * could splice into our tree? */
2474                                __d_materialise_dentry(dentry, alias);
2475                                write_sequnlock(&rename_lock);
2476                                __d_drop(alias);
2477                                goto found;
2478                        } else {
2479                                /* Nope, but we must(!) avoid directory
2480                                 * aliasing. This drops inode->i_lock */
2481                                actual = __d_unalias(inode, dentry, alias);
2482                        }
2483                        write_sequnlock(&rename_lock);
2484                        if (IS_ERR(actual)) {
2485                                if (PTR_ERR(actual) == -ELOOP)
2486                                        pr_warn_ratelimited(
2487                                                "VFS: Lookup of '%s' in %s %s"
2488                                                " would have caused loop\n",
2489                                                dentry->d_name.name,
2490                                                inode->i_sb->s_type->name,
2491                                                inode->i_sb->s_id);
2492                                dput(alias);
2493                        }
2494                        goto out_nolock;
2495                }
2496        }
2497
2498        /* Add a unique reference */
2499        actual = __d_instantiate_unique(dentry, inode);
2500        if (!actual)
2501                actual = dentry;
2502        else
2503                BUG_ON(!d_unhashed(actual));
2504
2505        spin_lock(&actual->d_lock);
2506found:
2507        _d_rehash(actual);
2508        spin_unlock(&actual->d_lock);
2509        spin_unlock(&inode->i_lock);
2510out_nolock:
2511        if (actual == dentry) {
2512                security_d_instantiate(dentry, inode);
2513                return NULL;
2514        }
2515
2516        iput(inode);
2517        return actual;
2518}
2519EXPORT_SYMBOL_GPL(d_materialise_unique);
2520
2521static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2522{
2523        *buflen -= namelen;
2524        if (*buflen < 0)
2525                return -ENAMETOOLONG;
2526        *buffer -= namelen;
2527        memcpy(*buffer, str, namelen);
2528        return 0;
2529}
2530
2531static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2532{
2533        return prepend(buffer, buflen, name->name, name->len);
2534}
2535
2536/**
2537 * prepend_path - Prepend path string to a buffer
2538 * @path: the dentry/vfsmount to report
2539 * @root: root vfsmnt/dentry
2540 * @buffer: pointer to the end of the buffer
2541 * @buflen: pointer to buffer length
2542 *
2543 * Caller holds the rename_lock.
2544 */
2545static int prepend_path(const struct path *path,
2546                        const struct path *root,
2547                        char **buffer, int *buflen)
2548{
2549        struct dentry *dentry = path->dentry;
2550        struct vfsmount *vfsmnt = path->mnt;
2551        struct mount *mnt = real_mount(vfsmnt);
2552        bool slash = false;
2553        int error = 0;
2554
2555        br_read_lock(&vfsmount_lock);
2556        while (dentry != root->dentry || vfsmnt != root->mnt) {
2557                struct dentry * parent;
2558
2559                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2560                        /* Global root? */
2561                        if (!mnt_has_parent(mnt))
2562                                goto global_root;
2563                        dentry = mnt->mnt_mountpoint;
2564                        mnt = mnt->mnt_parent;
2565                        vfsmnt = &mnt->mnt;
2566                        continue;
2567                }
2568                parent = dentry->d_parent;
2569                prefetch(parent);
2570                spin_lock(&dentry->d_lock);
2571                error = prepend_name(buffer, buflen, &dentry->d_name);
2572                spin_unlock(&dentry->d_lock);
2573                if (!error)
2574                        error = prepend(buffer, buflen, "/", 1);
2575                if (error)
2576                        break;
2577
2578                slash = true;
2579                dentry = parent;
2580        }
2581
2582        if (!error && !slash)
2583                error = prepend(buffer, buflen, "/", 1);
2584
2585out:
2586        br_read_unlock(&vfsmount_lock);
2587        return error;
2588
2589global_root:
2590        /*
2591         * Filesystems needing to implement special "root names"
2592         * should do so with ->d_dname()
2593         */
2594        if (IS_ROOT(dentry) &&
2595            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2596                WARN(1, "Root dentry has weird name <%.*s>\n",
2597                     (int) dentry->d_name.len, dentry->d_name.name);
2598        }
2599        if (!slash)
2600                error = prepend(buffer, buflen, "/", 1);
2601        if (!error)
2602                error = is_mounted(vfsmnt) ? 1 : 2;
2603        goto out;
2604}
2605
2606/**
2607 * __d_path - return the path of a dentry
2608 * @path: the dentry/vfsmount to report
2609 * @root: root vfsmnt/dentry
2610 * @buf: buffer to return value in
2611 * @buflen: buffer length
2612 *
2613 * Convert a dentry into an ASCII path name.
2614 *
2615 * Returns a pointer into the buffer or an error code if the
2616 * path was too long.
2617 *
2618 * "buflen" should be positive.
2619 *
2620 * If the path is not reachable from the supplied root, return %NULL.
2621 */
2622char *__d_path(const struct path *path,
2623               const struct path *root,
2624               char *buf, int buflen)
2625{
2626        char *res = buf + buflen;
2627        int error;
2628
2629        prepend(&res, &buflen, "\0", 1);
2630        write_seqlock(&rename_lock);
2631        error = prepend_path(path, root, &res, &buflen);
2632        write_sequnlock(&rename_lock);
2633
2634        if (error < 0)
2635                return ERR_PTR(error);
2636        if (error > 0)
2637                return NULL;
2638        return res;
2639}
2640
2641char *d_absolute_path(const struct path *path,
2642               char *buf, int buflen)
2643{
2644        struct path root = {};
2645        char *res = buf + buflen;
2646        int error;
2647
2648        prepend(&res, &buflen, "\0", 1);
2649        write_seqlock(&rename_lock);
2650        error = prepend_path(path, &root, &res, &buflen);
2651        write_sequnlock(&rename_lock);
2652
2653        if (error > 1)
2654                error = -EINVAL;
2655        if (error < 0)
2656                return ERR_PTR(error);
2657        return res;
2658}
2659
2660/*
2661 * same as __d_path but appends "(deleted)" for unlinked files.
2662 */
2663static int path_with_deleted(const struct path *path,
2664                             const struct path *root,
2665                             char **buf, int *buflen)
2666{
2667        prepend(buf, buflen, "\0", 1);
2668        if (d_unlinked(path->dentry)) {
2669                int error = prepend(buf, buflen, " (deleted)", 10);
2670                if (error)
2671                        return error;
2672        }
2673
2674        return prepend_path(path, root, buf, buflen);
2675}
2676
2677static int prepend_unreachable(char **buffer, int *buflen)
2678{
2679        return prepend(buffer, buflen, "(unreachable)", 13);
2680}
2681
2682/**
2683 * d_path - return the path of a dentry
2684 * @path: path to report
2685 * @buf: buffer to return value in
2686 * @buflen: buffer length
2687 *
2688 * Convert a dentry into an ASCII path name. If the entry has been deleted
2689 * the string " (deleted)" is appended. Note that this is ambiguous.
2690 *
2691 * Returns a pointer into the buffer or an error code if the path was
2692 * too long. Note: Callers should use the returned pointer, not the passed
2693 * in buffer, to use the name! The implementation often starts at an offset
2694 * into the buffer, and may leave 0 bytes at the start.
2695 *
2696 * "buflen" should be positive.
2697 */
2698char *d_path(const struct path *path, char *buf, int buflen)
2699{
2700        char *res = buf + buflen;
2701        struct path root;
2702        int error;
2703
2704        /*
2705         * We have various synthetic filesystems that never get mounted.  On
2706         * these filesystems dentries are never used for lookup purposes, and
2707         * thus don't need to be hashed.  They also don't need a name until a
2708         * user wants to identify the object in /proc/pid/fd/.  The little hack
2709         * below allows us to generate a name for these objects on demand:
2710         */
2711        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2712                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2713
2714        get_fs_root(current->fs, &root);
2715        write_seqlock(&rename_lock);
2716        error = path_with_deleted(path, &root, &res, &buflen);
2717        if (error < 0)
2718                res = ERR_PTR(error);
2719        write_sequnlock(&rename_lock);
2720        path_put(&root);
2721        return res;
2722}
2723EXPORT_SYMBOL(d_path);
2724
2725/**
2726 * d_path_with_unreachable - return the path of a dentry
2727 * @path: path to report
2728 * @buf: buffer to return value in
2729 * @buflen: buffer length
2730 *
2731 * The difference from d_path() is that this prepends "(unreachable)"
2732 * to paths which are unreachable from the current process' root.
2733 */
2734char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2735{
2736        char *res = buf + buflen;
2737        struct path root;
2738        int error;
2739
2740        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2741                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2742
2743        get_fs_root(current->fs, &root);
2744        write_seqlock(&rename_lock);
2745        error = path_with_deleted(path, &root, &res, &buflen);
2746        if (error > 0)
2747                error = prepend_unreachable(&res, &buflen);
2748        write_sequnlock(&rename_lock);
2749        path_put(&root);
2750        if (error)
2751                res =  ERR_PTR(error);
2752
2753        return res;
2754}
2755
2756/*
2757 * Helper function for dentry_operations.d_dname() members
2758 */
2759char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2760                        const char *fmt, ...)
2761{
2762        va_list args;
2763        char temp[64];
2764        int sz;
2765
2766        va_start(args, fmt);
2767        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2768        va_end(args);
2769
2770        if (sz > sizeof(temp) || sz > buflen)
2771                return ERR_PTR(-ENAMETOOLONG);
2772
2773        buffer += buflen - sz;
2774        return memcpy(buffer, temp, sz);
2775}
2776
2777/*
2778 * Write full pathname from the root of the filesystem into the buffer.
2779 */
2780static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2781{
2782        char *end = buf + buflen;
2783        char *retval;
2784
2785        prepend(&end, &buflen, "\0", 1);
2786        if (buflen < 1)
2787                goto Elong;
2788        /* Get '/' right */
2789        retval = end-1;
2790        *retval = '/';
2791
2792        while (!IS_ROOT(dentry)) {
2793                struct dentry *parent = dentry->d_parent;
2794                int error;
2795
2796                prefetch(parent);
2797                spin_lock(&dentry->d_lock);
2798                error = prepend_name(&end, &buflen, &dentry->d_name);
2799                spin_unlock(&dentry->d_lock);
2800                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2801                        goto Elong;
2802
2803                retval = end;
2804                dentry = parent;
2805        }
2806        return retval;
2807Elong:
2808        return ERR_PTR(-ENAMETOOLONG);
2809}
2810
2811char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2812{
2813        char *retval;
2814
2815        write_seqlock(&rename_lock);
2816        retval = __dentry_path(dentry, buf, buflen);
2817        write_sequnlock(&rename_lock);
2818
2819        return retval;
2820}
2821EXPORT_SYMBOL(dentry_path_raw);
2822
2823char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2824{
2825        char *p = NULL;
2826        char *retval;
2827
2828        write_seqlock(&rename_lock);
2829        if (d_unlinked(dentry)) {
2830                p = buf + buflen;
2831                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2832                        goto Elong;
2833                buflen++;
2834        }
2835        retval = __dentry_path(dentry, buf, buflen);
2836        write_sequnlock(&rename_lock);
2837        if (!IS_ERR(retval) && p)
2838                *p = '/';       /* restore '/' overriden with '\0' */
2839        return retval;
2840Elong:
2841        return ERR_PTR(-ENAMETOOLONG);
2842}
2843
2844/*
2845 * NOTE! The user-level library version returns a
2846 * character pointer. The kernel system call just
2847 * returns the length of the buffer filled (which
2848 * includes the ending '\0' character), or a negative
2849 * error value. So libc would do something like
2850 *
2851 *      char *getcwd(char * buf, size_t size)
2852 *      {
2853 *              int retval;
2854 *
2855 *              retval = sys_getcwd(buf, size);
2856 *              if (retval >= 0)
2857 *                      return buf;
2858 *              errno = -retval;
2859 *              return NULL;
2860 *      }
2861 */
2862SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2863{
2864        int error;
2865        struct path pwd, root;
2866        char *page = (char *) __get_free_page(GFP_USER);
2867
2868        if (!page)
2869                return -ENOMEM;
2870
2871        get_fs_root_and_pwd(current->fs, &root, &pwd);
2872
2873        error = -ENOENT;
2874        write_seqlock(&rename_lock);
2875        if (!d_unlinked(pwd.dentry)) {
2876                unsigned long len;
2877                char *cwd = page + PAGE_SIZE;
2878                int buflen = PAGE_SIZE;
2879
2880                prepend(&cwd, &buflen, "\0", 1);
2881                error = prepend_path(&pwd, &root, &cwd, &buflen);
2882                write_sequnlock(&rename_lock);
2883
2884                if (error < 0)
2885                        goto out;
2886
2887                /* Unreachable from current root */
2888                if (error > 0) {
2889                        error = prepend_unreachable(&cwd, &buflen);
2890                        if (error)
2891                                goto out;
2892                }
2893
2894                error = -ERANGE;
2895                len = PAGE_SIZE + page - cwd;
2896                if (len <= size) {
2897                        error = len;
2898                        if (copy_to_user(buf, cwd, len))
2899                                error = -EFAULT;
2900                }
2901        } else {
2902                write_sequnlock(&rename_lock);
2903        }
2904
2905out:
2906        path_put(&pwd);
2907        path_put(&root);
2908        free_page((unsigned long) page);
2909        return error;
2910}
2911
2912/*
2913 * Test whether new_dentry is a subdirectory of old_dentry.
2914 *
2915 * Trivially implemented using the dcache structure
2916 */
2917
2918/**
2919 * is_subdir - is new dentry a subdirectory of old_dentry
2920 * @new_dentry: new dentry
2921 * @old_dentry: old dentry
2922 *
2923 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2924 * Returns 0 otherwise.
2925 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2926 */
2927  
2928int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2929{
2930        int result;
2931        unsigned seq;
2932
2933        if (new_dentry == old_dentry)
2934                return 1;
2935
2936        do {
2937                /* for restarting inner loop in case of seq retry */
2938                seq = read_seqbegin(&rename_lock);
2939                /*
2940                 * Need rcu_readlock to protect against the d_parent trashing
2941                 * due to d_move
2942                 */
2943                rcu_read_lock();
2944                if (d_ancestor(old_dentry, new_dentry))
2945                        result = 1;
2946                else
2947                        result = 0;
2948                rcu_read_unlock();
2949        } while (read_seqretry(&rename_lock, seq));
2950
2951        return result;
2952}
2953
2954void d_genocide(struct dentry *root)
2955{
2956        struct dentry *this_parent;
2957        struct list_head *next;
2958        unsigned seq;
2959        int locked = 0;
2960
2961        seq = read_seqbegin(&rename_lock);
2962again:
2963        this_parent = root;
2964        spin_lock(&this_parent->d_lock);
2965repeat:
2966        next = this_parent->d_subdirs.next;
2967resume:
2968        while (next != &this_parent->d_subdirs) {
2969                struct list_head *tmp = next;
2970                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2971                next = tmp->next;
2972
2973                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2974                if (d_unhashed(dentry) || !dentry->d_inode) {
2975                        spin_unlock(&dentry->d_lock);
2976                        continue;
2977                }
2978                if (!list_empty(&dentry->d_subdirs)) {
2979                        spin_unlock(&this_parent->d_lock);
2980                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2981                        this_parent = dentry;
2982                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2983                        goto repeat;
2984                }
2985                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2986                        dentry->d_flags |= DCACHE_GENOCIDE;
2987                        dentry->d_count--;
2988                }
2989                spin_unlock(&dentry->d_lock);
2990        }
2991        if (this_parent != root) {
2992                struct dentry *child = this_parent;
2993                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2994                        this_parent->d_flags |= DCACHE_GENOCIDE;
2995                        this_parent->d_count--;
2996                }
2997                this_parent = try_to_ascend(this_parent, locked, seq);
2998                if (!this_parent)
2999                        goto rename_retry;
3000                next = child->d_u.d_child.next;
3001                goto resume;
3002        }
3003        spin_unlock(&this_parent->d_lock);
3004        if (!locked && read_seqretry(&rename_lock, seq))
3005                goto rename_retry;
3006        if (locked)
3007                write_sequnlock(&rename_lock);
3008        return;
3009
3010rename_retry:
3011        if (locked)
3012                goto again;
3013        locked = 1;
3014        write_seqlock(&rename_lock);
3015        goto again;
3016}
3017
3018/**
3019 * find_inode_number - check for dentry with name
3020 * @dir: directory to check
3021 * @name: Name to find.
3022 *
3023 * Check whether a dentry already exists for the given name,
3024 * and return the inode number if it has an inode. Otherwise
3025 * 0 is returned.
3026 *
3027 * This routine is used to post-process directory listings for
3028 * filesystems using synthetic inode numbers, and is necessary
3029 * to keep getcwd() working.
3030 */
3031 
3032ino_t find_inode_number(struct dentry *dir, struct qstr *name)
3033{
3034        struct dentry * dentry;
3035        ino_t ino = 0;
3036
3037        dentry = d_hash_and_lookup(dir, name);
3038        if (dentry) {
3039                if (dentry->d_inode)
3040                        ino = dentry->d_inode->i_ino;
3041                dput(dentry);
3042        }
3043        return ino;
3044}
3045EXPORT_SYMBOL(find_inode_number);
3046
3047static __initdata unsigned long dhash_entries;
3048static int __init set_dhash_entries(char *str)
3049{
3050        if (!str)
3051                return 0;
3052        dhash_entries = simple_strtoul(str, &str, 0);
3053        return 1;
3054}
3055__setup("dhash_entries=", set_dhash_entries);
3056
3057static void __init dcache_init_early(void)
3058{
3059        unsigned int loop;
3060
3061        /* If hashes are distributed across NUMA nodes, defer
3062         * hash allocation until vmalloc space is available.
3063         */
3064        if (hashdist)
3065                return;
3066
3067        dentry_hashtable =
3068                alloc_large_system_hash("Dentry cache",
3069                                        sizeof(struct hlist_bl_head),
3070                                        dhash_entries,
3071                                        13,
3072                                        HASH_EARLY,
3073                                        &d_hash_shift,
3074                                        &d_hash_mask,
3075                                        0,
3076                                        0);
3077
3078        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3079                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3080}
3081
3082static void __init dcache_init(void)
3083{
3084        unsigned int loop;
3085
3086        /* 
3087         * A constructor could be added for stable state like the lists,
3088         * but it is probably not worth it because of the cache nature
3089         * of the dcache. 
3090         */
3091        dentry_cache = KMEM_CACHE(dentry,
3092                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3093
3094        /* Hash may have been set up in dcache_init_early */
3095        if (!hashdist)
3096                return;
3097
3098        dentry_hashtable =
3099                alloc_large_system_hash("Dentry cache",
3100                                        sizeof(struct hlist_bl_head),
3101                                        dhash_entries,
3102                                        13,
3103                                        0,
3104                                        &d_hash_shift,
3105                                        &d_hash_mask,
3106                                        0,
3107                                        0);
3108
3109        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3110                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3111}
3112
3113/* SLAB cache for __getname() consumers */
3114struct kmem_cache *names_cachep __read_mostly;
3115EXPORT_SYMBOL(names_cachep);
3116
3117EXPORT_SYMBOL(d_genocide);
3118
3119void __init vfs_caches_init_early(void)
3120{
3121        dcache_init_early();
3122        inode_init_early();
3123}
3124
3125void __init vfs_caches_init(unsigned long mempages)
3126{
3127        unsigned long reserve;
3128
3129        /* Base hash sizes on available memory, with a reserve equal to
3130           150% of current kernel size */
3131
3132        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3133        mempages -= reserve;
3134
3135        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3136                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3137
3138        dcache_init();
3139        inode_init();
3140        files_init(mempages);
3141        mnt_init();
3142        bdev_cache_init();
3143        chrdev_init();
3144}
3145
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.