linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/export.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include <linux/prefetch.h>
  39#include <linux/ratelimit.h>
  40#include "internal.h"
  41#include "mount.h"
  42
  43/*
  44 * Usage:
  45 * dcache->d_inode->i_lock protects:
  46 *   - i_dentry, d_alias, d_inode of aliases
  47 * dcache_hash_bucket lock protects:
  48 *   - the dcache hash table
  49 * s_anon bl list spinlock protects:
  50 *   - the s_anon list (see __d_drop)
  51 * dcache_lru_lock protects:
  52 *   - the dcache lru lists and counters
  53 * d_lock protects:
  54 *   - d_flags
  55 *   - d_name
  56 *   - d_lru
  57 *   - d_count
  58 *   - d_unhashed()
  59 *   - d_parent and d_subdirs
  60 *   - childrens' d_child and d_parent
  61 *   - d_alias, d_inode
  62 *
  63 * Ordering:
  64 * dentry->d_inode->i_lock
  65 *   dentry->d_lock
  66 *     dcache_lru_lock
  67 *     dcache_hash_bucket lock
  68 *     s_anon lock
  69 *
  70 * If there is an ancestor relationship:
  71 * dentry->d_parent->...->d_parent->d_lock
  72 *   ...
  73 *     dentry->d_parent->d_lock
  74 *       dentry->d_lock
  75 *
  76 * If no ancestor relationship:
  77 * if (dentry1 < dentry2)
  78 *   dentry1->d_lock
  79 *     dentry2->d_lock
  80 */
  81int sysctl_vfs_cache_pressure __read_mostly = 100;
  82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  83
  84static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  86
  87EXPORT_SYMBOL(rename_lock);
  88
  89static struct kmem_cache *dentry_cache __read_mostly;
  90
  91/*
  92 * This is the single most critical data structure when it comes
  93 * to the dcache: the hashtable for lookups. Somebody should try
  94 * to make this good - I've just made it work.
  95 *
  96 * This hash-function tries to avoid losing too many bits of hash
  97 * information, yet avoid using a prime hash-size or similar.
  98 */
  99#define D_HASHBITS     d_hash_shift
 100#define D_HASHMASK     d_hash_mask
 101
 102static unsigned int d_hash_mask __read_mostly;
 103static unsigned int d_hash_shift __read_mostly;
 104
 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
 106
 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 108                                        unsigned int hash)
 109{
 110        hash += (unsigned long) parent / L1_CACHE_BYTES;
 111        hash = hash + (hash >> D_HASHBITS);
 112        return dentry_hashtable + (hash & D_HASHMASK);
 113}
 114
 115/* Statistics gathering. */
 116struct dentry_stat_t dentry_stat = {
 117        .age_limit = 45,
 118};
 119
 120static DEFINE_PER_CPU(unsigned int, nr_dentry);
 121
 122#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 123static int get_nr_dentry(void)
 124{
 125        int i;
 126        int sum = 0;
 127        for_each_possible_cpu(i)
 128                sum += per_cpu(nr_dentry, i);
 129        return sum < 0 ? 0 : sum;
 130}
 131
 132int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 133                   size_t *lenp, loff_t *ppos)
 134{
 135        dentry_stat.nr_dentry = get_nr_dentry();
 136        return proc_dointvec(table, write, buffer, lenp, ppos);
 137}
 138#endif
 139
 140/*
 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 142 * The strings are both count bytes long, and count is non-zero.
 143 */
 144#ifdef CONFIG_DCACHE_WORD_ACCESS
 145
 146#include <asm/word-at-a-time.h>
 147/*
 148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 149 * aligned allocation for this particular component. We don't
 150 * strictly need the load_unaligned_zeropad() safety, but it
 151 * doesn't hurt either.
 152 *
 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 154 * need the careful unaligned handling.
 155 */
 156static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 157{
 158        unsigned long a,b,mask;
 159
 160        for (;;) {
 161                a = *(unsigned long *)cs;
 162                b = load_unaligned_zeropad(ct);
 163                if (tcount < sizeof(unsigned long))
 164                        break;
 165                if (unlikely(a != b))
 166                        return 1;
 167                cs += sizeof(unsigned long);
 168                ct += sizeof(unsigned long);
 169                tcount -= sizeof(unsigned long);
 170                if (!tcount)
 171                        return 0;
 172        }
 173        mask = ~(~0ul << tcount*8);
 174        return unlikely(!!((a ^ b) & mask));
 175}
 176
 177#else
 178
 179static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 180{
 181        do {
 182                if (*cs != *ct)
 183                        return 1;
 184                cs++;
 185                ct++;
 186                tcount--;
 187        } while (tcount);
 188        return 0;
 189}
 190
 191#endif
 192
 193static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 194{
 195        const unsigned char *cs;
 196        /*
 197         * Be careful about RCU walk racing with rename:
 198         * use ACCESS_ONCE to fetch the name pointer.
 199         *
 200         * NOTE! Even if a rename will mean that the length
 201         * was not loaded atomically, we don't care. The
 202         * RCU walk will check the sequence count eventually,
 203         * and catch it. And we won't overrun the buffer,
 204         * because we're reading the name pointer atomically,
 205         * and a dentry name is guaranteed to be properly
 206         * terminated with a NUL byte.
 207         *
 208         * End result: even if 'len' is wrong, we'll exit
 209         * early because the data cannot match (there can
 210         * be no NUL in the ct/tcount data)
 211         */
 212        cs = ACCESS_ONCE(dentry->d_name.name);
 213        smp_read_barrier_depends();
 214        return dentry_string_cmp(cs, ct, tcount);
 215}
 216
 217static void __d_free(struct rcu_head *head)
 218{
 219        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 220
 221        WARN_ON(!hlist_unhashed(&dentry->d_alias));
 222        if (dname_external(dentry))
 223                kfree(dentry->d_name.name);
 224        kmem_cache_free(dentry_cache, dentry); 
 225}
 226
 227/*
 228 * no locks, please.
 229 */
 230static void d_free(struct dentry *dentry)
 231{
 232        BUG_ON(dentry->d_count);
 233        this_cpu_dec(nr_dentry);
 234        if (dentry->d_op && dentry->d_op->d_release)
 235                dentry->d_op->d_release(dentry);
 236
 237        /* if dentry was never visible to RCU, immediate free is OK */
 238        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 239                __d_free(&dentry->d_u.d_rcu);
 240        else
 241                call_rcu(&dentry->d_u.d_rcu, __d_free);
 242}
 243
 244/**
 245 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 246 * @dentry: the target dentry
 247 * After this call, in-progress rcu-walk path lookup will fail. This
 248 * should be called after unhashing, and after changing d_inode (if
 249 * the dentry has not already been unhashed).
 250 */
 251static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 252{
 253        assert_spin_locked(&dentry->d_lock);
 254        /* Go through a barrier */
 255        write_seqcount_barrier(&dentry->d_seq);
 256}
 257
 258/*
 259 * Release the dentry's inode, using the filesystem
 260 * d_iput() operation if defined. Dentry has no refcount
 261 * and is unhashed.
 262 */
 263static void dentry_iput(struct dentry * dentry)
 264        __releases(dentry->d_lock)
 265        __releases(dentry->d_inode->i_lock)
 266{
 267        struct inode *inode = dentry->d_inode;
 268        if (inode) {
 269                dentry->d_inode = NULL;
 270                hlist_del_init(&dentry->d_alias);
 271                spin_unlock(&dentry->d_lock);
 272                spin_unlock(&inode->i_lock);
 273                if (!inode->i_nlink)
 274                        fsnotify_inoderemove(inode);
 275                if (dentry->d_op && dentry->d_op->d_iput)
 276                        dentry->d_op->d_iput(dentry, inode);
 277                else
 278                        iput(inode);
 279        } else {
 280                spin_unlock(&dentry->d_lock);
 281        }
 282}
 283
 284/*
 285 * Release the dentry's inode, using the filesystem
 286 * d_iput() operation if defined. dentry remains in-use.
 287 */
 288static void dentry_unlink_inode(struct dentry * dentry)
 289        __releases(dentry->d_lock)
 290        __releases(dentry->d_inode->i_lock)
 291{
 292        struct inode *inode = dentry->d_inode;
 293        dentry->d_inode = NULL;
 294        hlist_del_init(&dentry->d_alias);
 295        dentry_rcuwalk_barrier(dentry);
 296        spin_unlock(&dentry->d_lock);
 297        spin_unlock(&inode->i_lock);
 298        if (!inode->i_nlink)
 299                fsnotify_inoderemove(inode);
 300        if (dentry->d_op && dentry->d_op->d_iput)
 301                dentry->d_op->d_iput(dentry, inode);
 302        else
 303                iput(inode);
 304}
 305
 306/*
 307 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 308 */
 309static void dentry_lru_add(struct dentry *dentry)
 310{
 311        if (list_empty(&dentry->d_lru)) {
 312                spin_lock(&dcache_lru_lock);
 313                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 314                dentry->d_sb->s_nr_dentry_unused++;
 315                dentry_stat.nr_unused++;
 316                spin_unlock(&dcache_lru_lock);
 317        }
 318}
 319
 320static void __dentry_lru_del(struct dentry *dentry)
 321{
 322        list_del_init(&dentry->d_lru);
 323        dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 324        dentry->d_sb->s_nr_dentry_unused--;
 325        dentry_stat.nr_unused--;
 326}
 327
 328/*
 329 * Remove a dentry with references from the LRU.
 330 */
 331static void dentry_lru_del(struct dentry *dentry)
 332{
 333        if (!list_empty(&dentry->d_lru)) {
 334                spin_lock(&dcache_lru_lock);
 335                __dentry_lru_del(dentry);
 336                spin_unlock(&dcache_lru_lock);
 337        }
 338}
 339
 340/*
 341 * Remove a dentry that is unreferenced and about to be pruned
 342 * (unhashed and destroyed) from the LRU, and inform the file system.
 343 * This wrapper should be called _prior_ to unhashing a victim dentry.
 344 */
 345static void dentry_lru_prune(struct dentry *dentry)
 346{
 347        if (!list_empty(&dentry->d_lru)) {
 348                if (dentry->d_flags & DCACHE_OP_PRUNE)
 349                        dentry->d_op->d_prune(dentry);
 350
 351                spin_lock(&dcache_lru_lock);
 352                __dentry_lru_del(dentry);
 353                spin_unlock(&dcache_lru_lock);
 354        }
 355}
 356
 357static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 358{
 359        spin_lock(&dcache_lru_lock);
 360        if (list_empty(&dentry->d_lru)) {
 361                list_add_tail(&dentry->d_lru, list);
 362                dentry->d_sb->s_nr_dentry_unused++;
 363                dentry_stat.nr_unused++;
 364        } else {
 365                list_move_tail(&dentry->d_lru, list);
 366        }
 367        spin_unlock(&dcache_lru_lock);
 368}
 369
 370/**
 371 * d_kill - kill dentry and return parent
 372 * @dentry: dentry to kill
 373 * @parent: parent dentry
 374 *
 375 * The dentry must already be unhashed and removed from the LRU.
 376 *
 377 * If this is the root of the dentry tree, return NULL.
 378 *
 379 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 380 * d_kill.
 381 */
 382static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 383        __releases(dentry->d_lock)
 384        __releases(parent->d_lock)
 385        __releases(dentry->d_inode->i_lock)
 386{
 387        list_del(&dentry->d_u.d_child);
 388        /*
 389         * Inform try_to_ascend() that we are no longer attached to the
 390         * dentry tree
 391         */
 392        dentry->d_flags |= DCACHE_DENTRY_KILLED;
 393        if (parent)
 394                spin_unlock(&parent->d_lock);
 395        dentry_iput(dentry);
 396        /*
 397         * dentry_iput drops the locks, at which point nobody (except
 398         * transient RCU lookups) can reach this dentry.
 399         */
 400        d_free(dentry);
 401        return parent;
 402}
 403
 404/*
 405 * Unhash a dentry without inserting an RCU walk barrier or checking that
 406 * dentry->d_lock is locked.  The caller must take care of that, if
 407 * appropriate.
 408 */
 409static void __d_shrink(struct dentry *dentry)
 410{
 411        if (!d_unhashed(dentry)) {
 412                struct hlist_bl_head *b;
 413                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 414                        b = &dentry->d_sb->s_anon;
 415                else
 416                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 417
 418                hlist_bl_lock(b);
 419                __hlist_bl_del(&dentry->d_hash);
 420                dentry->d_hash.pprev = NULL;
 421                hlist_bl_unlock(b);
 422        }
 423}
 424
 425/**
 426 * d_drop - drop a dentry
 427 * @dentry: dentry to drop
 428 *
 429 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 430 * be found through a VFS lookup any more. Note that this is different from
 431 * deleting the dentry - d_delete will try to mark the dentry negative if
 432 * possible, giving a successful _negative_ lookup, while d_drop will
 433 * just make the cache lookup fail.
 434 *
 435 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 436 * reason (NFS timeouts or autofs deletes).
 437 *
 438 * __d_drop requires dentry->d_lock.
 439 */
 440void __d_drop(struct dentry *dentry)
 441{
 442        if (!d_unhashed(dentry)) {
 443                __d_shrink(dentry);
 444                dentry_rcuwalk_barrier(dentry);
 445        }
 446}
 447EXPORT_SYMBOL(__d_drop);
 448
 449void d_drop(struct dentry *dentry)
 450{
 451        spin_lock(&dentry->d_lock);
 452        __d_drop(dentry);
 453        spin_unlock(&dentry->d_lock);
 454}
 455EXPORT_SYMBOL(d_drop);
 456
 457/*
 458 * Finish off a dentry we've decided to kill.
 459 * dentry->d_lock must be held, returns with it unlocked.
 460 * If ref is non-zero, then decrement the refcount too.
 461 * Returns dentry requiring refcount drop, or NULL if we're done.
 462 */
 463static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 464        __releases(dentry->d_lock)
 465{
 466        struct inode *inode;
 467        struct dentry *parent;
 468
 469        inode = dentry->d_inode;
 470        if (inode && !spin_trylock(&inode->i_lock)) {
 471relock:
 472                spin_unlock(&dentry->d_lock);
 473                cpu_relax();
 474                return dentry; /* try again with same dentry */
 475        }
 476        if (IS_ROOT(dentry))
 477                parent = NULL;
 478        else
 479                parent = dentry->d_parent;
 480        if (parent && !spin_trylock(&parent->d_lock)) {
 481                if (inode)
 482                        spin_unlock(&inode->i_lock);
 483                goto relock;
 484        }
 485
 486        if (ref)
 487                dentry->d_count--;
 488        /*
 489         * if dentry was on the d_lru list delete it from there.
 490         * inform the fs via d_prune that this dentry is about to be
 491         * unhashed and destroyed.
 492         */
 493        dentry_lru_prune(dentry);
 494        /* if it was on the hash then remove it */
 495        __d_drop(dentry);
 496        return d_kill(dentry, parent);
 497}
 498
 499/* 
 500 * This is dput
 501 *
 502 * This is complicated by the fact that we do not want to put
 503 * dentries that are no longer on any hash chain on the unused
 504 * list: we'd much rather just get rid of them immediately.
 505 *
 506 * However, that implies that we have to traverse the dentry
 507 * tree upwards to the parents which might _also_ now be
 508 * scheduled for deletion (it may have been only waiting for
 509 * its last child to go away).
 510 *
 511 * This tail recursion is done by hand as we don't want to depend
 512 * on the compiler to always get this right (gcc generally doesn't).
 513 * Real recursion would eat up our stack space.
 514 */
 515
 516/*
 517 * dput - release a dentry
 518 * @dentry: dentry to release 
 519 *
 520 * Release a dentry. This will drop the usage count and if appropriate
 521 * call the dentry unlink method as well as removing it from the queues and
 522 * releasing its resources. If the parent dentries were scheduled for release
 523 * they too may now get deleted.
 524 */
 525void dput(struct dentry *dentry)
 526{
 527        if (!dentry)
 528                return;
 529
 530repeat:
 531        if (dentry->d_count == 1)
 532                might_sleep();
 533        spin_lock(&dentry->d_lock);
 534        BUG_ON(!dentry->d_count);
 535        if (dentry->d_count > 1) {
 536                dentry->d_count--;
 537                spin_unlock(&dentry->d_lock);
 538                return;
 539        }
 540
 541        if (dentry->d_flags & DCACHE_OP_DELETE) {
 542                if (dentry->d_op->d_delete(dentry))
 543                        goto kill_it;
 544        }
 545
 546        /* Unreachable? Get rid of it */
 547        if (d_unhashed(dentry))
 548                goto kill_it;
 549
 550        dentry->d_flags |= DCACHE_REFERENCED;
 551        dentry_lru_add(dentry);
 552
 553        dentry->d_count--;
 554        spin_unlock(&dentry->d_lock);
 555        return;
 556
 557kill_it:
 558        dentry = dentry_kill(dentry, 1);
 559        if (dentry)
 560                goto repeat;
 561}
 562EXPORT_SYMBOL(dput);
 563
 564/**
 565 * d_invalidate - invalidate a dentry
 566 * @dentry: dentry to invalidate
 567 *
 568 * Try to invalidate the dentry if it turns out to be
 569 * possible. If there are other dentries that can be
 570 * reached through this one we can't delete it and we
 571 * return -EBUSY. On success we return 0.
 572 *
 573 * no dcache lock.
 574 */
 575 
 576int d_invalidate(struct dentry * dentry)
 577{
 578        /*
 579         * If it's already been dropped, return OK.
 580         */
 581        spin_lock(&dentry->d_lock);
 582        if (d_unhashed(dentry)) {
 583                spin_unlock(&dentry->d_lock);
 584                return 0;
 585        }
 586        /*
 587         * Check whether to do a partial shrink_dcache
 588         * to get rid of unused child entries.
 589         */
 590        if (!list_empty(&dentry->d_subdirs)) {
 591                spin_unlock(&dentry->d_lock);
 592                shrink_dcache_parent(dentry);
 593                spin_lock(&dentry->d_lock);
 594        }
 595
 596        /*
 597         * Somebody else still using it?
 598         *
 599         * If it's a directory, we can't drop it
 600         * for fear of somebody re-populating it
 601         * with children (even though dropping it
 602         * would make it unreachable from the root,
 603         * we might still populate it if it was a
 604         * working directory or similar).
 605         * We also need to leave mountpoints alone,
 606         * directory or not.
 607         */
 608        if (dentry->d_count > 1 && dentry->d_inode) {
 609                if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
 610                        spin_unlock(&dentry->d_lock);
 611                        return -EBUSY;
 612                }
 613        }
 614
 615        __d_drop(dentry);
 616        spin_unlock(&dentry->d_lock);
 617        return 0;
 618}
 619EXPORT_SYMBOL(d_invalidate);
 620
 621/* This must be called with d_lock held */
 622static inline void __dget_dlock(struct dentry *dentry)
 623{
 624        dentry->d_count++;
 625}
 626
 627static inline void __dget(struct dentry *dentry)
 628{
 629        spin_lock(&dentry->d_lock);
 630        __dget_dlock(dentry);
 631        spin_unlock(&dentry->d_lock);
 632}
 633
 634struct dentry *dget_parent(struct dentry *dentry)
 635{
 636        struct dentry *ret;
 637
 638repeat:
 639        /*
 640         * Don't need rcu_dereference because we re-check it was correct under
 641         * the lock.
 642         */
 643        rcu_read_lock();
 644        ret = dentry->d_parent;
 645        spin_lock(&ret->d_lock);
 646        if (unlikely(ret != dentry->d_parent)) {
 647                spin_unlock(&ret->d_lock);
 648                rcu_read_unlock();
 649                goto repeat;
 650        }
 651        rcu_read_unlock();
 652        BUG_ON(!ret->d_count);
 653        ret->d_count++;
 654        spin_unlock(&ret->d_lock);
 655        return ret;
 656}
 657EXPORT_SYMBOL(dget_parent);
 658
 659/**
 660 * d_find_alias - grab a hashed alias of inode
 661 * @inode: inode in question
 662 * @want_discon:  flag, used by d_splice_alias, to request
 663 *          that only a DISCONNECTED alias be returned.
 664 *
 665 * If inode has a hashed alias, or is a directory and has any alias,
 666 * acquire the reference to alias and return it. Otherwise return NULL.
 667 * Notice that if inode is a directory there can be only one alias and
 668 * it can be unhashed only if it has no children, or if it is the root
 669 * of a filesystem.
 670 *
 671 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 672 * any other hashed alias over that one unless @want_discon is set,
 673 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 674 */
 675static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 676{
 677        struct dentry *alias, *discon_alias;
 678
 679again:
 680        discon_alias = NULL;
 681        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 682                spin_lock(&alias->d_lock);
 683                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 684                        if (IS_ROOT(alias) &&
 685                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 686                                discon_alias = alias;
 687                        } else if (!want_discon) {
 688                                __dget_dlock(alias);
 689                                spin_unlock(&alias->d_lock);
 690                                return alias;
 691                        }
 692                }
 693                spin_unlock(&alias->d_lock);
 694        }
 695        if (discon_alias) {
 696                alias = discon_alias;
 697                spin_lock(&alias->d_lock);
 698                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 699                        if (IS_ROOT(alias) &&
 700                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 701                                __dget_dlock(alias);
 702                                spin_unlock(&alias->d_lock);
 703                                return alias;
 704                        }
 705                }
 706                spin_unlock(&alias->d_lock);
 707                goto again;
 708        }
 709        return NULL;
 710}
 711
 712struct dentry *d_find_alias(struct inode *inode)
 713{
 714        struct dentry *de = NULL;
 715
 716        if (!hlist_empty(&inode->i_dentry)) {
 717                spin_lock(&inode->i_lock);
 718                de = __d_find_alias(inode, 0);
 719                spin_unlock(&inode->i_lock);
 720        }
 721        return de;
 722}
 723EXPORT_SYMBOL(d_find_alias);
 724
 725/*
 726 *      Try to kill dentries associated with this inode.
 727 * WARNING: you must own a reference to inode.
 728 */
 729void d_prune_aliases(struct inode *inode)
 730{
 731        struct dentry *dentry;
 732restart:
 733        spin_lock(&inode->i_lock);
 734        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 735                spin_lock(&dentry->d_lock);
 736                if (!dentry->d_count) {
 737                        __dget_dlock(dentry);
 738                        __d_drop(dentry);
 739                        spin_unlock(&dentry->d_lock);
 740                        spin_unlock(&inode->i_lock);
 741                        dput(dentry);
 742                        goto restart;
 743                }
 744                spin_unlock(&dentry->d_lock);
 745        }
 746        spin_unlock(&inode->i_lock);
 747}
 748EXPORT_SYMBOL(d_prune_aliases);
 749
 750/*
 751 * Try to throw away a dentry - free the inode, dput the parent.
 752 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 753 * Releases dentry->d_lock.
 754 *
 755 * This may fail if locks cannot be acquired no problem, just try again.
 756 */
 757static void try_prune_one_dentry(struct dentry *dentry)
 758        __releases(dentry->d_lock)
 759{
 760        struct dentry *parent;
 761
 762        parent = dentry_kill(dentry, 0);
 763        /*
 764         * If dentry_kill returns NULL, we have nothing more to do.
 765         * if it returns the same dentry, trylocks failed. In either
 766         * case, just loop again.
 767         *
 768         * Otherwise, we need to prune ancestors too. This is necessary
 769         * to prevent quadratic behavior of shrink_dcache_parent(), but
 770         * is also expected to be beneficial in reducing dentry cache
 771         * fragmentation.
 772         */
 773        if (!parent)
 774                return;
 775        if (parent == dentry)
 776                return;
 777
 778        /* Prune ancestors. */
 779        dentry = parent;
 780        while (dentry) {
 781                spin_lock(&dentry->d_lock);
 782                if (dentry->d_count > 1) {
 783                        dentry->d_count--;
 784                        spin_unlock(&dentry->d_lock);
 785                        return;
 786                }
 787                dentry = dentry_kill(dentry, 1);
 788        }
 789}
 790
 791static void shrink_dentry_list(struct list_head *list)
 792{
 793        struct dentry *dentry;
 794
 795        rcu_read_lock();
 796        for (;;) {
 797                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 798                if (&dentry->d_lru == list)
 799                        break; /* empty */
 800                spin_lock(&dentry->d_lock);
 801                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 802                        spin_unlock(&dentry->d_lock);
 803                        continue;
 804                }
 805
 806                /*
 807                 * We found an inuse dentry which was not removed from
 808                 * the LRU because of laziness during lookup.  Do not free
 809                 * it - just keep it off the LRU list.
 810                 */
 811                if (dentry->d_count) {
 812                        dentry_lru_del(dentry);
 813                        spin_unlock(&dentry->d_lock);
 814                        continue;
 815                }
 816
 817                rcu_read_unlock();
 818
 819                try_prune_one_dentry(dentry);
 820
 821                rcu_read_lock();
 822        }
 823        rcu_read_unlock();
 824}
 825
 826/**
 827 * prune_dcache_sb - shrink the dcache
 828 * @sb: superblock
 829 * @count: number of entries to try to free
 830 *
 831 * Attempt to shrink the superblock dcache LRU by @count entries. This is
 832 * done when we need more memory an called from the superblock shrinker
 833 * function.
 834 *
 835 * This function may fail to free any resources if all the dentries are in
 836 * use.
 837 */
 838void prune_dcache_sb(struct super_block *sb, int count)
 839{
 840        struct dentry *dentry;
 841        LIST_HEAD(referenced);
 842        LIST_HEAD(tmp);
 843
 844relock:
 845        spin_lock(&dcache_lru_lock);
 846        while (!list_empty(&sb->s_dentry_lru)) {
 847                dentry = list_entry(sb->s_dentry_lru.prev,
 848                                struct dentry, d_lru);
 849                BUG_ON(dentry->d_sb != sb);
 850
 851                if (!spin_trylock(&dentry->d_lock)) {
 852                        spin_unlock(&dcache_lru_lock);
 853                        cpu_relax();
 854                        goto relock;
 855                }
 856
 857                if (dentry->d_flags & DCACHE_REFERENCED) {
 858                        dentry->d_flags &= ~DCACHE_REFERENCED;
 859                        list_move(&dentry->d_lru, &referenced);
 860                        spin_unlock(&dentry->d_lock);
 861                } else {
 862                        list_move_tail(&dentry->d_lru, &tmp);
 863                        dentry->d_flags |= DCACHE_SHRINK_LIST;
 864                        spin_unlock(&dentry->d_lock);
 865                        if (!--count)
 866                                break;
 867                }
 868                cond_resched_lock(&dcache_lru_lock);
 869        }
 870        if (!list_empty(&referenced))
 871                list_splice(&referenced, &sb->s_dentry_lru);
 872        spin_unlock(&dcache_lru_lock);
 873
 874        shrink_dentry_list(&tmp);
 875}
 876
 877/**
 878 * shrink_dcache_sb - shrink dcache for a superblock
 879 * @sb: superblock
 880 *
 881 * Shrink the dcache for the specified super block. This is used to free
 882 * the dcache before unmounting a file system.
 883 */
 884void shrink_dcache_sb(struct super_block *sb)
 885{
 886        LIST_HEAD(tmp);
 887
 888        spin_lock(&dcache_lru_lock);
 889        while (!list_empty(&sb->s_dentry_lru)) {
 890                list_splice_init(&sb->s_dentry_lru, &tmp);
 891                spin_unlock(&dcache_lru_lock);
 892                shrink_dentry_list(&tmp);
 893                spin_lock(&dcache_lru_lock);
 894        }
 895        spin_unlock(&dcache_lru_lock);
 896}
 897EXPORT_SYMBOL(shrink_dcache_sb);
 898
 899/*
 900 * destroy a single subtree of dentries for unmount
 901 * - see the comments on shrink_dcache_for_umount() for a description of the
 902 *   locking
 903 */
 904static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 905{
 906        struct dentry *parent;
 907
 908        BUG_ON(!IS_ROOT(dentry));
 909
 910        for (;;) {
 911                /* descend to the first leaf in the current subtree */
 912                while (!list_empty(&dentry->d_subdirs))
 913                        dentry = list_entry(dentry->d_subdirs.next,
 914                                            struct dentry, d_u.d_child);
 915
 916                /* consume the dentries from this leaf up through its parents
 917                 * until we find one with children or run out altogether */
 918                do {
 919                        struct inode *inode;
 920
 921                        /*
 922                         * remove the dentry from the lru, and inform
 923                         * the fs that this dentry is about to be
 924                         * unhashed and destroyed.
 925                         */
 926                        dentry_lru_prune(dentry);
 927                        __d_shrink(dentry);
 928
 929                        if (dentry->d_count != 0) {
 930                                printk(KERN_ERR
 931                                       "BUG: Dentry %p{i=%lx,n=%s}"
 932                                       " still in use (%d)"
 933                                       " [unmount of %s %s]\n",
 934                                       dentry,
 935                                       dentry->d_inode ?
 936                                       dentry->d_inode->i_ino : 0UL,
 937                                       dentry->d_name.name,
 938                                       dentry->d_count,
 939                                       dentry->d_sb->s_type->name,
 940                                       dentry->d_sb->s_id);
 941                                BUG();
 942                        }
 943
 944                        if (IS_ROOT(dentry)) {
 945                                parent = NULL;
 946                                list_del(&dentry->d_u.d_child);
 947                        } else {
 948                                parent = dentry->d_parent;
 949                                parent->d_count--;
 950                                list_del(&dentry->d_u.d_child);
 951                        }
 952
 953                        inode = dentry->d_inode;
 954                        if (inode) {
 955                                dentry->d_inode = NULL;
 956                                hlist_del_init(&dentry->d_alias);
 957                                if (dentry->d_op && dentry->d_op->d_iput)
 958                                        dentry->d_op->d_iput(dentry, inode);
 959                                else
 960                                        iput(inode);
 961                        }
 962
 963                        d_free(dentry);
 964
 965                        /* finished when we fall off the top of the tree,
 966                         * otherwise we ascend to the parent and move to the
 967                         * next sibling if there is one */
 968                        if (!parent)
 969                                return;
 970                        dentry = parent;
 971                } while (list_empty(&dentry->d_subdirs));
 972
 973                dentry = list_entry(dentry->d_subdirs.next,
 974                                    struct dentry, d_u.d_child);
 975        }
 976}
 977
 978/*
 979 * destroy the dentries attached to a superblock on unmounting
 980 * - we don't need to use dentry->d_lock because:
 981 *   - the superblock is detached from all mountings and open files, so the
 982 *     dentry trees will not be rearranged by the VFS
 983 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 984 *     any dentries belonging to this superblock that it comes across
 985 *   - the filesystem itself is no longer permitted to rearrange the dentries
 986 *     in this superblock
 987 */
 988void shrink_dcache_for_umount(struct super_block *sb)
 989{
 990        struct dentry *dentry;
 991
 992        if (down_read_trylock(&sb->s_umount))
 993                BUG();
 994
 995        dentry = sb->s_root;
 996        sb->s_root = NULL;
 997        dentry->d_count--;
 998        shrink_dcache_for_umount_subtree(dentry);
 999
1000        while (!hlist_bl_empty(&sb->s_anon)) {
1001                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1002                shrink_dcache_for_umount_subtree(dentry);
1003        }
1004}
1005
1006/*
1007 * This tries to ascend one level of parenthood, but
1008 * we can race with renaming, so we need to re-check
1009 * the parenthood after dropping the lock and check
1010 * that the sequence number still matches.
1011 */
1012static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1013{
1014        struct dentry *new = old->d_parent;
1015
1016        rcu_read_lock();
1017        spin_unlock(&old->d_lock);
1018        spin_lock(&new->d_lock);
1019
1020        /*
1021         * might go back up the wrong parent if we have had a rename
1022         * or deletion
1023         */
1024        if (new != old->d_parent ||
1025                 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1026                 (!locked && read_seqretry(&rename_lock, seq))) {
1027                spin_unlock(&new->d_lock);
1028                new = NULL;
1029        }
1030        rcu_read_unlock();
1031        return new;
1032}
1033
1034
1035/*
1036 * Search for at least 1 mount point in the dentry's subdirs.
1037 * We descend to the next level whenever the d_subdirs
1038 * list is non-empty and continue searching.
1039 */
1040 
1041/**
1042 * have_submounts - check for mounts over a dentry
1043 * @parent: dentry to check.
1044 *
1045 * Return true if the parent or its subdirectories contain
1046 * a mount point
1047 */
1048int have_submounts(struct dentry *parent)
1049{
1050        struct dentry *this_parent;
1051        struct list_head *next;
1052        unsigned seq;
1053        int locked = 0;
1054
1055        seq = read_seqbegin(&rename_lock);
1056again:
1057        this_parent = parent;
1058
1059        if (d_mountpoint(parent))
1060                goto positive;
1061        spin_lock(&this_parent->d_lock);
1062repeat:
1063        next = this_parent->d_subdirs.next;
1064resume:
1065        while (next != &this_parent->d_subdirs) {
1066                struct list_head *tmp = next;
1067                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1068                next = tmp->next;
1069
1070                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1071                /* Have we found a mount point ? */
1072                if (d_mountpoint(dentry)) {
1073                        spin_unlock(&dentry->d_lock);
1074                        spin_unlock(&this_parent->d_lock);
1075                        goto positive;
1076                }
1077                if (!list_empty(&dentry->d_subdirs)) {
1078                        spin_unlock(&this_parent->d_lock);
1079                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1080                        this_parent = dentry;
1081                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1082                        goto repeat;
1083                }
1084                spin_unlock(&dentry->d_lock);
1085        }
1086        /*
1087         * All done at this level ... ascend and resume the search.
1088         */
1089        if (this_parent != parent) {
1090                struct dentry *child = this_parent;
1091                this_parent = try_to_ascend(this_parent, locked, seq);
1092                if (!this_parent)
1093                        goto rename_retry;
1094                next = child->d_u.d_child.next;
1095                goto resume;
1096        }
1097        spin_unlock(&this_parent->d_lock);
1098        if (!locked && read_seqretry(&rename_lock, seq))
1099                goto rename_retry;
1100        if (locked)
1101                write_sequnlock(&rename_lock);
1102        return 0; /* No mount points found in tree */
1103positive:
1104        if (!locked && read_seqretry(&rename_lock, seq))
1105                goto rename_retry;
1106        if (locked)
1107                write_sequnlock(&rename_lock);
1108        return 1;
1109
1110rename_retry:
1111        if (locked)
1112                goto again;
1113        locked = 1;
1114        write_seqlock(&rename_lock);
1115        goto again;
1116}
1117EXPORT_SYMBOL(have_submounts);
1118
1119/*
1120 * Search the dentry child list of the specified parent,
1121 * and move any unused dentries to the end of the unused
1122 * list for prune_dcache(). We descend to the next level
1123 * whenever the d_subdirs list is non-empty and continue
1124 * searching.
1125 *
1126 * It returns zero iff there are no unused children,
1127 * otherwise  it returns the number of children moved to
1128 * the end of the unused list. This may not be the total
1129 * number of unused children, because select_parent can
1130 * drop the lock and return early due to latency
1131 * constraints.
1132 */
1133static int select_parent(struct dentry *parent, struct list_head *dispose)
1134{
1135        struct dentry *this_parent;
1136        struct list_head *next;
1137        unsigned seq;
1138        int found = 0;
1139        int locked = 0;
1140
1141        seq = read_seqbegin(&rename_lock);
1142again:
1143        this_parent = parent;
1144        spin_lock(&this_parent->d_lock);
1145repeat:
1146        next = this_parent->d_subdirs.next;
1147resume:
1148        while (next != &this_parent->d_subdirs) {
1149                struct list_head *tmp = next;
1150                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1151                next = tmp->next;
1152
1153                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1154
1155                /*
1156                 * move only zero ref count dentries to the dispose list.
1157                 *
1158                 * Those which are presently on the shrink list, being processed
1159                 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1160                 * loop in shrink_dcache_parent() might not make any progress
1161                 * and loop forever.
1162                 */
1163                if (dentry->d_count) {
1164                        dentry_lru_del(dentry);
1165                } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1166                        dentry_lru_move_list(dentry, dispose);
1167                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1168                        found++;
1169                }
1170                /*
1171                 * We can return to the caller if we have found some (this
1172                 * ensures forward progress). We'll be coming back to find
1173                 * the rest.
1174                 */
1175                if (found && need_resched()) {
1176                        spin_unlock(&dentry->d_lock);
1177                        goto out;
1178                }
1179
1180                /*
1181                 * Descend a level if the d_subdirs list is non-empty.
1182                 */
1183                if (!list_empty(&dentry->d_subdirs)) {
1184                        spin_unlock(&this_parent->d_lock);
1185                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1186                        this_parent = dentry;
1187                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1188                        goto repeat;
1189                }
1190
1191                spin_unlock(&dentry->d_lock);
1192        }
1193        /*
1194         * All done at this level ... ascend and resume the search.
1195         */
1196        if (this_parent != parent) {
1197                struct dentry *child = this_parent;
1198                this_parent = try_to_ascend(this_parent, locked, seq);
1199                if (!this_parent)
1200                        goto rename_retry;
1201                next = child->d_u.d_child.next;
1202                goto resume;
1203        }
1204out:
1205        spin_unlock(&this_parent->d_lock);
1206        if (!locked && read_seqretry(&rename_lock, seq))
1207                goto rename_retry;
1208        if (locked)
1209                write_sequnlock(&rename_lock);
1210        return found;
1211
1212rename_retry:
1213        if (found)
1214                return found;
1215        if (locked)
1216                goto again;
1217        locked = 1;
1218        write_seqlock(&rename_lock);
1219        goto again;
1220}
1221
1222/**
1223 * shrink_dcache_parent - prune dcache
1224 * @parent: parent of entries to prune
1225 *
1226 * Prune the dcache to remove unused children of the parent dentry.
1227 */
1228void shrink_dcache_parent(struct dentry * parent)
1229{
1230        LIST_HEAD(dispose);
1231        int found;
1232
1233        while ((found = select_parent(parent, &dispose)) != 0)
1234                shrink_dentry_list(&dispose);
1235}
1236EXPORT_SYMBOL(shrink_dcache_parent);
1237
1238/**
1239 * __d_alloc    -       allocate a dcache entry
1240 * @sb: filesystem it will belong to
1241 * @name: qstr of the name
1242 *
1243 * Allocates a dentry. It returns %NULL if there is insufficient memory
1244 * available. On a success the dentry is returned. The name passed in is
1245 * copied and the copy passed in may be reused after this call.
1246 */
1247 
1248struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1249{
1250        struct dentry *dentry;
1251        char *dname;
1252
1253        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1254        if (!dentry)
1255                return NULL;
1256
1257        /*
1258         * We guarantee that the inline name is always NUL-terminated.
1259         * This way the memcpy() done by the name switching in rename
1260         * will still always have a NUL at the end, even if we might
1261         * be overwriting an internal NUL character
1262         */
1263        dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1264        if (name->len > DNAME_INLINE_LEN-1) {
1265                dname = kmalloc(name->len + 1, GFP_KERNEL);
1266                if (!dname) {
1267                        kmem_cache_free(dentry_cache, dentry); 
1268                        return NULL;
1269                }
1270        } else  {
1271                dname = dentry->d_iname;
1272        }       
1273
1274        dentry->d_name.len = name->len;
1275        dentry->d_name.hash = name->hash;
1276        memcpy(dname, name->name, name->len);
1277        dname[name->len] = 0;
1278
1279        /* Make sure we always see the terminating NUL character */
1280        smp_wmb();
1281        dentry->d_name.name = dname;
1282
1283        dentry->d_count = 1;
1284        dentry->d_flags = 0;
1285        spin_lock_init(&dentry->d_lock);
1286        seqcount_init(&dentry->d_seq);
1287        dentry->d_inode = NULL;
1288        dentry->d_parent = dentry;
1289        dentry->d_sb = sb;
1290        dentry->d_op = NULL;
1291        dentry->d_fsdata = NULL;
1292        INIT_HLIST_BL_NODE(&dentry->d_hash);
1293        INIT_LIST_HEAD(&dentry->d_lru);
1294        INIT_LIST_HEAD(&dentry->d_subdirs);
1295        INIT_HLIST_NODE(&dentry->d_alias);
1296        INIT_LIST_HEAD(&dentry->d_u.d_child);
1297        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1298
1299        this_cpu_inc(nr_dentry);
1300
1301        return dentry;
1302}
1303
1304/**
1305 * d_alloc      -       allocate a dcache entry
1306 * @parent: parent of entry to allocate
1307 * @name: qstr of the name
1308 *
1309 * Allocates a dentry. It returns %NULL if there is insufficient memory
1310 * available. On a success the dentry is returned. The name passed in is
1311 * copied and the copy passed in may be reused after this call.
1312 */
1313struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1314{
1315        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1316        if (!dentry)
1317                return NULL;
1318
1319        spin_lock(&parent->d_lock);
1320        /*
1321         * don't need child lock because it is not subject
1322         * to concurrency here
1323         */
1324        __dget_dlock(parent);
1325        dentry->d_parent = parent;
1326        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1327        spin_unlock(&parent->d_lock);
1328
1329        return dentry;
1330}
1331EXPORT_SYMBOL(d_alloc);
1332
1333struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1334{
1335        struct dentry *dentry = __d_alloc(sb, name);
1336        if (dentry)
1337                dentry->d_flags |= DCACHE_DISCONNECTED;
1338        return dentry;
1339}
1340EXPORT_SYMBOL(d_alloc_pseudo);
1341
1342struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1343{
1344        struct qstr q;
1345
1346        q.name = name;
1347        q.len = strlen(name);
1348        q.hash = full_name_hash(q.name, q.len);
1349        return d_alloc(parent, &q);
1350}
1351EXPORT_SYMBOL(d_alloc_name);
1352
1353void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1354{
1355        WARN_ON_ONCE(dentry->d_op);
1356        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1357                                DCACHE_OP_COMPARE       |
1358                                DCACHE_OP_REVALIDATE    |
1359                                DCACHE_OP_WEAK_REVALIDATE       |
1360                                DCACHE_OP_DELETE ));
1361        dentry->d_op = op;
1362        if (!op)
1363                return;
1364        if (op->d_hash)
1365                dentry->d_flags |= DCACHE_OP_HASH;
1366        if (op->d_compare)
1367                dentry->d_flags |= DCACHE_OP_COMPARE;
1368        if (op->d_revalidate)
1369                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1370        if (op->d_weak_revalidate)
1371                dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1372        if (op->d_delete)
1373                dentry->d_flags |= DCACHE_OP_DELETE;
1374        if (op->d_prune)
1375                dentry->d_flags |= DCACHE_OP_PRUNE;
1376
1377}
1378EXPORT_SYMBOL(d_set_d_op);
1379
1380static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1381{
1382        spin_lock(&dentry->d_lock);
1383        if (inode) {
1384                if (unlikely(IS_AUTOMOUNT(inode)))
1385                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1386                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1387        }
1388        dentry->d_inode = inode;
1389        dentry_rcuwalk_barrier(dentry);
1390        spin_unlock(&dentry->d_lock);
1391        fsnotify_d_instantiate(dentry, inode);
1392}
1393
1394/**
1395 * d_instantiate - fill in inode information for a dentry
1396 * @entry: dentry to complete
1397 * @inode: inode to attach to this dentry
1398 *
1399 * Fill in inode information in the entry.
1400 *
1401 * This turns negative dentries into productive full members
1402 * of society.
1403 *
1404 * NOTE! This assumes that the inode count has been incremented
1405 * (or otherwise set) by the caller to indicate that it is now
1406 * in use by the dcache.
1407 */
1408 
1409void d_instantiate(struct dentry *entry, struct inode * inode)
1410{
1411        BUG_ON(!hlist_unhashed(&entry->d_alias));
1412        if (inode)
1413                spin_lock(&inode->i_lock);
1414        __d_instantiate(entry, inode);
1415        if (inode)
1416                spin_unlock(&inode->i_lock);
1417        security_d_instantiate(entry, inode);
1418}
1419EXPORT_SYMBOL(d_instantiate);
1420
1421/**
1422 * d_instantiate_unique - instantiate a non-aliased dentry
1423 * @entry: dentry to instantiate
1424 * @inode: inode to attach to this dentry
1425 *
1426 * Fill in inode information in the entry. On success, it returns NULL.
1427 * If an unhashed alias of "entry" already exists, then we return the
1428 * aliased dentry instead and drop one reference to inode.
1429 *
1430 * Note that in order to avoid conflicts with rename() etc, the caller
1431 * had better be holding the parent directory semaphore.
1432 *
1433 * This also assumes that the inode count has been incremented
1434 * (or otherwise set) by the caller to indicate that it is now
1435 * in use by the dcache.
1436 */
1437static struct dentry *__d_instantiate_unique(struct dentry *entry,
1438                                             struct inode *inode)
1439{
1440        struct dentry *alias;
1441        int len = entry->d_name.len;
1442        const char *name = entry->d_name.name;
1443        unsigned int hash = entry->d_name.hash;
1444
1445        if (!inode) {
1446                __d_instantiate(entry, NULL);
1447                return NULL;
1448        }
1449
1450        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
1451                /*
1452                 * Don't need alias->d_lock here, because aliases with
1453                 * d_parent == entry->d_parent are not subject to name or
1454                 * parent changes, because the parent inode i_mutex is held.
1455                 */
1456                if (alias->d_name.hash != hash)
1457                        continue;
1458                if (alias->d_parent != entry->d_parent)
1459                        continue;
1460                if (alias->d_name.len != len)
1461                        continue;
1462                if (dentry_cmp(alias, name, len))
1463                        continue;
1464                __dget(alias);
1465                return alias;
1466        }
1467
1468        __d_instantiate(entry, inode);
1469        return NULL;
1470}
1471
1472struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1473{
1474        struct dentry *result;
1475
1476        BUG_ON(!hlist_unhashed(&entry->d_alias));
1477
1478        if (inode)
1479                spin_lock(&inode->i_lock);
1480        result = __d_instantiate_unique(entry, inode);
1481        if (inode)
1482                spin_unlock(&inode->i_lock);
1483
1484        if (!result) {
1485                security_d_instantiate(entry, inode);
1486                return NULL;
1487        }
1488
1489        BUG_ON(!d_unhashed(result));
1490        iput(inode);
1491        return result;
1492}
1493
1494EXPORT_SYMBOL(d_instantiate_unique);
1495
1496struct dentry *d_make_root(struct inode *root_inode)
1497{
1498        struct dentry *res = NULL;
1499
1500        if (root_inode) {
1501                static const struct qstr name = QSTR_INIT("/", 1);
1502
1503                res = __d_alloc(root_inode->i_sb, &name);
1504                if (res)
1505                        d_instantiate(res, root_inode);
1506                else
1507                        iput(root_inode);
1508        }
1509        return res;
1510}
1511EXPORT_SYMBOL(d_make_root);
1512
1513static struct dentry * __d_find_any_alias(struct inode *inode)
1514{
1515        struct dentry *alias;
1516
1517        if (hlist_empty(&inode->i_dentry))
1518                return NULL;
1519        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
1520        __dget(alias);
1521        return alias;
1522}
1523
1524/**
1525 * d_find_any_alias - find any alias for a given inode
1526 * @inode: inode to find an alias for
1527 *
1528 * If any aliases exist for the given inode, take and return a
1529 * reference for one of them.  If no aliases exist, return %NULL.
1530 */
1531struct dentry *d_find_any_alias(struct inode *inode)
1532{
1533        struct dentry *de;
1534
1535        spin_lock(&inode->i_lock);
1536        de = __d_find_any_alias(inode);
1537        spin_unlock(&inode->i_lock);
1538        return de;
1539}
1540EXPORT_SYMBOL(d_find_any_alias);
1541
1542/**
1543 * d_obtain_alias - find or allocate a dentry for a given inode
1544 * @inode: inode to allocate the dentry for
1545 *
1546 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1547 * similar open by handle operations.  The returned dentry may be anonymous,
1548 * or may have a full name (if the inode was already in the cache).
1549 *
1550 * When called on a directory inode, we must ensure that the inode only ever
1551 * has one dentry.  If a dentry is found, that is returned instead of
1552 * allocating a new one.
1553 *
1554 * On successful return, the reference to the inode has been transferred
1555 * to the dentry.  In case of an error the reference on the inode is released.
1556 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1557 * be passed in and will be the error will be propagate to the return value,
1558 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1559 */
1560struct dentry *d_obtain_alias(struct inode *inode)
1561{
1562        static const struct qstr anonstring = QSTR_INIT("/", 1);
1563        struct dentry *tmp;
1564        struct dentry *res;
1565
1566        if (!inode)
1567                return ERR_PTR(-ESTALE);
1568        if (IS_ERR(inode))
1569                return ERR_CAST(inode);
1570
1571        res = d_find_any_alias(inode);
1572        if (res)
1573                goto out_iput;
1574
1575        tmp = __d_alloc(inode->i_sb, &anonstring);
1576        if (!tmp) {
1577                res = ERR_PTR(-ENOMEM);
1578                goto out_iput;
1579        }
1580
1581        spin_lock(&inode->i_lock);
1582        res = __d_find_any_alias(inode);
1583        if (res) {
1584                spin_unlock(&inode->i_lock);
1585                dput(tmp);
1586                goto out_iput;
1587        }
1588
1589        /* attach a disconnected dentry */
1590        spin_lock(&tmp->d_lock);
1591        tmp->d_inode = inode;
1592        tmp->d_flags |= DCACHE_DISCONNECTED;
1593        hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1594        hlist_bl_lock(&tmp->d_sb->s_anon);
1595        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1596        hlist_bl_unlock(&tmp->d_sb->s_anon);
1597        spin_unlock(&tmp->d_lock);
1598        spin_unlock(&inode->i_lock);
1599        security_d_instantiate(tmp, inode);
1600
1601        return tmp;
1602
1603 out_iput:
1604        if (res && !IS_ERR(res))
1605                security_d_instantiate(res, inode);
1606        iput(inode);
1607        return res;
1608}
1609EXPORT_SYMBOL(d_obtain_alias);
1610
1611/**
1612 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1613 * @inode:  the inode which may have a disconnected dentry
1614 * @dentry: a negative dentry which we want to point to the inode.
1615 *
1616 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1617 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1618 * and return it, else simply d_add the inode to the dentry and return NULL.
1619 *
1620 * This is needed in the lookup routine of any filesystem that is exportable
1621 * (via knfsd) so that we can build dcache paths to directories effectively.
1622 *
1623 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1624 * is returned.  This matches the expected return value of ->lookup.
1625 *
1626 */
1627struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1628{
1629        struct dentry *new = NULL;
1630
1631        if (IS_ERR(inode))
1632                return ERR_CAST(inode);
1633
1634        if (inode && S_ISDIR(inode->i_mode)) {
1635                spin_lock(&inode->i_lock);
1636                new = __d_find_alias(inode, 1);
1637                if (new) {
1638                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1639                        spin_unlock(&inode->i_lock);
1640                        security_d_instantiate(new, inode);
1641                        d_move(new, dentry);
1642                        iput(inode);
1643                } else {
1644                        /* already taking inode->i_lock, so d_add() by hand */
1645                        __d_instantiate(dentry, inode);
1646                        spin_unlock(&inode->i_lock);
1647                        security_d_instantiate(dentry, inode);
1648                        d_rehash(dentry);
1649                }
1650        } else
1651                d_add(dentry, inode);
1652        return new;
1653}
1654EXPORT_SYMBOL(d_splice_alias);
1655
1656/**
1657 * d_add_ci - lookup or allocate new dentry with case-exact name
1658 * @inode:  the inode case-insensitive lookup has found
1659 * @dentry: the negative dentry that was passed to the parent's lookup func
1660 * @name:   the case-exact name to be associated with the returned dentry
1661 *
1662 * This is to avoid filling the dcache with case-insensitive names to the
1663 * same inode, only the actual correct case is stored in the dcache for
1664 * case-insensitive filesystems.
1665 *
1666 * For a case-insensitive lookup match and if the the case-exact dentry
1667 * already exists in in the dcache, use it and return it.
1668 *
1669 * If no entry exists with the exact case name, allocate new dentry with
1670 * the exact case, and return the spliced entry.
1671 */
1672struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1673                        struct qstr *name)
1674{
1675        struct dentry *found;
1676        struct dentry *new;
1677
1678        /*
1679         * First check if a dentry matching the name already exists,
1680         * if not go ahead and create it now.
1681         */
1682        found = d_hash_and_lookup(dentry->d_parent, name);
1683        if (unlikely(IS_ERR(found)))
1684                goto err_out;
1685        if (!found) {
1686                new = d_alloc(dentry->d_parent, name);
1687                if (!new) {
1688                        found = ERR_PTR(-ENOMEM);
1689                        goto err_out;
1690                }
1691
1692                found = d_splice_alias(inode, new);
1693                if (found) {
1694                        dput(new);
1695                        return found;
1696                }
1697                return new;
1698        }
1699
1700        /*
1701         * If a matching dentry exists, and it's not negative use it.
1702         *
1703         * Decrement the reference count to balance the iget() done
1704         * earlier on.
1705         */
1706        if (found->d_inode) {
1707                if (unlikely(found->d_inode != inode)) {
1708                        /* This can't happen because bad inodes are unhashed. */
1709                        BUG_ON(!is_bad_inode(inode));
1710                        BUG_ON(!is_bad_inode(found->d_inode));
1711                }
1712                iput(inode);
1713                return found;
1714        }
1715
1716        /*
1717         * Negative dentry: instantiate it unless the inode is a directory and
1718         * already has a dentry.
1719         */
1720        new = d_splice_alias(inode, found);
1721        if (new) {
1722                dput(found);
1723                found = new;
1724        }
1725        return found;
1726
1727err_out:
1728        iput(inode);
1729        return found;
1730}
1731EXPORT_SYMBOL(d_add_ci);
1732
1733/*
1734 * Do the slow-case of the dentry name compare.
1735 *
1736 * Unlike the dentry_cmp() function, we need to atomically
1737 * load the name, length and inode information, so that the
1738 * filesystem can rely on them, and can use the 'name' and
1739 * 'len' information without worrying about walking off the
1740 * end of memory etc.
1741 *
1742 * Thus the read_seqcount_retry() and the "duplicate" info
1743 * in arguments (the low-level filesystem should not look
1744 * at the dentry inode or name contents directly, since
1745 * rename can change them while we're in RCU mode).
1746 */
1747enum slow_d_compare {
1748        D_COMP_OK,
1749        D_COMP_NOMATCH,
1750        D_COMP_SEQRETRY,
1751};
1752
1753static noinline enum slow_d_compare slow_dentry_cmp(
1754                const struct dentry *parent,
1755                struct inode *inode,
1756                struct dentry *dentry,
1757                unsigned int seq,
1758                const struct qstr *name)
1759{
1760        int tlen = dentry->d_name.len;
1761        const char *tname = dentry->d_name.name;
1762        struct inode *i = dentry->d_inode;
1763
1764        if (read_seqcount_retry(&dentry->d_seq, seq)) {
1765                cpu_relax();
1766                return D_COMP_SEQRETRY;
1767        }
1768        if (parent->d_op->d_compare(parent, inode,
1769                                dentry, i,
1770                                tlen, tname, name))
1771                return D_COMP_NOMATCH;
1772        return D_COMP_OK;
1773}
1774
1775/**
1776 * __d_lookup_rcu - search for a dentry (racy, store-free)
1777 * @parent: parent dentry
1778 * @name: qstr of name we wish to find
1779 * @seqp: returns d_seq value at the point where the dentry was found
1780 * @inode: returns dentry->d_inode when the inode was found valid.
1781 * Returns: dentry, or NULL
1782 *
1783 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1784 * resolution (store-free path walking) design described in
1785 * Documentation/filesystems/path-lookup.txt.
1786 *
1787 * This is not to be used outside core vfs.
1788 *
1789 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1790 * held, and rcu_read_lock held. The returned dentry must not be stored into
1791 * without taking d_lock and checking d_seq sequence count against @seq
1792 * returned here.
1793 *
1794 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1795 * function.
1796 *
1797 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1798 * the returned dentry, so long as its parent's seqlock is checked after the
1799 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1800 * is formed, giving integrity down the path walk.
1801 *
1802 * NOTE! The caller *has* to check the resulting dentry against the sequence
1803 * number we've returned before using any of the resulting dentry state!
1804 */
1805struct dentry *__d_lookup_rcu(const struct dentry *parent,
1806                                const struct qstr *name,
1807                                unsigned *seqp, struct inode *inode)
1808{
1809        u64 hashlen = name->hash_len;
1810        const unsigned char *str = name->name;
1811        struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
1812        struct hlist_bl_node *node;
1813        struct dentry *dentry;
1814
1815        /*
1816         * Note: There is significant duplication with __d_lookup_rcu which is
1817         * required to prevent single threaded performance regressions
1818         * especially on architectures where smp_rmb (in seqcounts) are costly.
1819         * Keep the two functions in sync.
1820         */
1821
1822        /*
1823         * The hash list is protected using RCU.
1824         *
1825         * Carefully use d_seq when comparing a candidate dentry, to avoid
1826         * races with d_move().
1827         *
1828         * It is possible that concurrent renames can mess up our list
1829         * walk here and result in missing our dentry, resulting in the
1830         * false-negative result. d_lookup() protects against concurrent
1831         * renames using rename_lock seqlock.
1832         *
1833         * See Documentation/filesystems/path-lookup.txt for more details.
1834         */
1835        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1836                unsigned seq;
1837
1838seqretry:
1839                /*
1840                 * The dentry sequence count protects us from concurrent
1841                 * renames, and thus protects inode, parent and name fields.
1842                 *
1843                 * The caller must perform a seqcount check in order
1844                 * to do anything useful with the returned dentry,
1845                 * including using the 'd_inode' pointer.
1846                 *
1847                 * NOTE! We do a "raw" seqcount_begin here. That means that
1848                 * we don't wait for the sequence count to stabilize if it
1849                 * is in the middle of a sequence change. If we do the slow
1850                 * dentry compare, we will do seqretries until it is stable,
1851                 * and if we end up with a successful lookup, we actually
1852                 * want to exit RCU lookup anyway.
1853                 */
1854                seq = raw_seqcount_begin(&dentry->d_seq);
1855                if (dentry->d_parent != parent)
1856                        continue;
1857                if (d_unhashed(dentry))
1858                        continue;
1859                *seqp = seq;
1860
1861                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1862                        if (dentry->d_name.hash != hashlen_hash(hashlen))
1863                                continue;
1864                        switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
1865                        case D_COMP_OK:
1866                                return dentry;
1867                        case D_COMP_NOMATCH:
1868                                continue;
1869                        default:
1870                                goto seqretry;
1871                        }
1872                }
1873
1874                if (dentry->d_name.hash_len != hashlen)
1875                        continue;
1876                if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
1877                        return dentry;
1878        }
1879        return NULL;
1880}
1881
1882/**
1883 * d_lookup - search for a dentry
1884 * @parent: parent dentry
1885 * @name: qstr of name we wish to find
1886 * Returns: dentry, or NULL
1887 *
1888 * d_lookup searches the children of the parent dentry for the name in
1889 * question. If the dentry is found its reference count is incremented and the
1890 * dentry is returned. The caller must use dput to free the entry when it has
1891 * finished using it. %NULL is returned if the dentry does not exist.
1892 */
1893struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
1894{
1895        struct dentry *dentry;
1896        unsigned seq;
1897
1898        do {
1899                seq = read_seqbegin(&rename_lock);
1900                dentry = __d_lookup(parent, name);
1901                if (dentry)
1902                        break;
1903        } while (read_seqretry(&rename_lock, seq));
1904        return dentry;
1905}
1906EXPORT_SYMBOL(d_lookup);
1907
1908/**
1909 * __d_lookup - search for a dentry (racy)
1910 * @parent: parent dentry
1911 * @name: qstr of name we wish to find
1912 * Returns: dentry, or NULL
1913 *
1914 * __d_lookup is like d_lookup, however it may (rarely) return a
1915 * false-negative result due to unrelated rename activity.
1916 *
1917 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1918 * however it must be used carefully, eg. with a following d_lookup in
1919 * the case of failure.
1920 *
1921 * __d_lookup callers must be commented.
1922 */
1923struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
1924{
1925        unsigned int len = name->len;
1926        unsigned int hash = name->hash;
1927        const unsigned char *str = name->name;
1928        struct hlist_bl_head *b = d_hash(parent, hash);
1929        struct hlist_bl_node *node;
1930        struct dentry *found = NULL;
1931        struct dentry *dentry;
1932
1933        /*
1934         * Note: There is significant duplication with __d_lookup_rcu which is
1935         * required to prevent single threaded performance regressions
1936         * especially on architectures where smp_rmb (in seqcounts) are costly.
1937         * Keep the two functions in sync.
1938         */
1939
1940        /*
1941         * The hash list is protected using RCU.
1942         *
1943         * Take d_lock when comparing a candidate dentry, to avoid races
1944         * with d_move().
1945         *
1946         * It is possible that concurrent renames can mess up our list
1947         * walk here and result in missing our dentry, resulting in the
1948         * false-negative result. d_lookup() protects against concurrent
1949         * renames using rename_lock seqlock.
1950         *
1951         * See Documentation/filesystems/path-lookup.txt for more details.
1952         */
1953        rcu_read_lock();
1954        
1955        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1956
1957                if (dentry->d_name.hash != hash)
1958                        continue;
1959
1960                spin_lock(&dentry->d_lock);
1961                if (dentry->d_parent != parent)
1962                        goto next;
1963                if (d_unhashed(dentry))
1964                        goto next;
1965
1966                /*
1967                 * It is safe to compare names since d_move() cannot
1968                 * change the qstr (protected by d_lock).
1969                 */
1970                if (parent->d_flags & DCACHE_OP_COMPARE) {
1971                        int tlen = dentry->d_name.len;
1972                        const char *tname = dentry->d_name.name;
1973                        if (parent->d_op->d_compare(parent, parent->d_inode,
1974                                                dentry, dentry->d_inode,
1975                                                tlen, tname, name))
1976                                goto next;
1977                } else {
1978                        if (dentry->d_name.len != len)
1979                                goto next;
1980                        if (dentry_cmp(dentry, str, len))
1981                                goto next;
1982                }
1983
1984                dentry->d_count++;
1985                found = dentry;
1986                spin_unlock(&dentry->d_lock);
1987                break;
1988next:
1989                spin_unlock(&dentry->d_lock);
1990        }
1991        rcu_read_unlock();
1992
1993        return found;
1994}
1995
1996/**
1997 * d_hash_and_lookup - hash the qstr then search for a dentry
1998 * @dir: Directory to search in
1999 * @name: qstr of name we wish to find
2000 *
2001 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2002 */
2003struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2004{
2005        /*
2006         * Check for a fs-specific hash function. Note that we must
2007         * calculate the standard hash first, as the d_op->d_hash()
2008         * routine may choose to leave the hash value unchanged.
2009         */
2010        name->hash = full_name_hash(name->name, name->len);
2011        if (dir->d_flags & DCACHE_OP_HASH) {
2012                int err = dir->d_op->d_hash(dir, dir->d_inode, name);
2013                if (unlikely(err < 0))
2014                        return ERR_PTR(err);
2015        }
2016        return d_lookup(dir, name);
2017}
2018EXPORT_SYMBOL(d_hash_and_lookup);
2019
2020/**
2021 * d_validate - verify dentry provided from insecure source (deprecated)
2022 * @dentry: The dentry alleged to be valid child of @dparent
2023 * @dparent: The parent dentry (known to be valid)
2024 *
2025 * An insecure source has sent us a dentry, here we verify it and dget() it.
2026 * This is used by ncpfs in its readdir implementation.
2027 * Zero is returned in the dentry is invalid.
2028 *
2029 * This function is slow for big directories, and deprecated, do not use it.
2030 */
2031int d_validate(struct dentry *dentry, struct dentry *dparent)
2032{
2033        struct dentry *child;
2034
2035        spin_lock(&dparent->d_lock);
2036        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2037                if (dentry == child) {
2038                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2039                        __dget_dlock(dentry);
2040                        spin_unlock(&dentry->d_lock);
2041                        spin_unlock(&dparent->d_lock);
2042                        return 1;
2043                }
2044        }
2045        spin_unlock(&dparent->d_lock);
2046
2047        return 0;
2048}
2049EXPORT_SYMBOL(d_validate);
2050
2051/*
2052 * When a file is deleted, we have two options:
2053 * - turn this dentry into a negative dentry
2054 * - unhash this dentry and free it.
2055 *
2056 * Usually, we want to just turn this into
2057 * a negative dentry, but if anybody else is
2058 * currently using the dentry or the inode
2059 * we can't do that and we fall back on removing
2060 * it from the hash queues and waiting for
2061 * it to be deleted later when it has no users
2062 */
2063 
2064/**
2065 * d_delete - delete a dentry
2066 * @dentry: The dentry to delete
2067 *
2068 * Turn the dentry into a negative dentry if possible, otherwise
2069 * remove it from the hash queues so it can be deleted later
2070 */
2071 
2072void d_delete(struct dentry * dentry)
2073{
2074        struct inode *inode;
2075        int isdir = 0;
2076        /*
2077         * Are we the only user?
2078         */
2079again:
2080        spin_lock(&dentry->d_lock);
2081        inode = dentry->d_inode;
2082        isdir = S_ISDIR(inode->i_mode);
2083        if (dentry->d_count == 1) {
2084                if (!spin_trylock(&inode->i_lock)) {
2085                        spin_unlock(&dentry->d_lock);
2086                        cpu_relax();
2087                        goto again;
2088                }
2089                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2090                dentry_unlink_inode(dentry);
2091                fsnotify_nameremove(dentry, isdir);
2092                return;
2093        }
2094
2095        if (!d_unhashed(dentry))
2096                __d_drop(dentry);
2097
2098        spin_unlock(&dentry->d_lock);
2099
2100        fsnotify_nameremove(dentry, isdir);
2101}
2102EXPORT_SYMBOL(d_delete);
2103
2104static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2105{
2106        BUG_ON(!d_unhashed(entry));
2107        hlist_bl_lock(b);
2108        entry->d_flags |= DCACHE_RCUACCESS;
2109        hlist_bl_add_head_rcu(&entry->d_hash, b);
2110        hlist_bl_unlock(b);
2111}
2112
2113static void _d_rehash(struct dentry * entry)
2114{
2115        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2116}
2117
2118/**
2119 * d_rehash     - add an entry back to the hash
2120 * @entry: dentry to add to the hash
2121 *
2122 * Adds a dentry to the hash according to its name.
2123 */
2124 
2125void d_rehash(struct dentry * entry)
2126{
2127        spin_lock(&entry->d_lock);
2128        _d_rehash(entry);
2129        spin_unlock(&entry->d_lock);
2130}
2131EXPORT_SYMBOL(d_rehash);
2132
2133/**
2134 * dentry_update_name_case - update case insensitive dentry with a new name
2135 * @dentry: dentry to be updated
2136 * @name: new name
2137 *
2138 * Update a case insensitive dentry with new case of name.
2139 *
2140 * dentry must have been returned by d_lookup with name @name. Old and new
2141 * name lengths must match (ie. no d_compare which allows mismatched name
2142 * lengths).
2143 *
2144 * Parent inode i_mutex must be held over d_lookup and into this call (to
2145 * keep renames and concurrent inserts, and readdir(2) away).
2146 */
2147void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2148{
2149        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2150        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2151
2152        spin_lock(&dentry->d_lock);
2153        write_seqcount_begin(&dentry->d_seq);
2154        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2155        write_seqcount_end(&dentry->d_seq);
2156        spin_unlock(&dentry->d_lock);
2157}
2158EXPORT_SYMBOL(dentry_update_name_case);
2159
2160static void switch_names(struct dentry *dentry, struct dentry *target)
2161{
2162        if (dname_external(target)) {
2163                if (dname_external(dentry)) {
2164                        /*
2165                         * Both external: swap the pointers
2166                         */
2167                        swap(target->d_name.name, dentry->d_name.name);
2168                } else {
2169                        /*
2170                         * dentry:internal, target:external.  Steal target's
2171                         * storage and make target internal.
2172                         */
2173                        memcpy(target->d_iname, dentry->d_name.name,
2174                                        dentry->d_name.len + 1);
2175                        dentry->d_name.name = target->d_name.name;
2176                        target->d_name.name = target->d_iname;
2177                }
2178        } else {
2179                if (dname_external(dentry)) {
2180                        /*
2181                         * dentry:external, target:internal.  Give dentry's
2182                         * storage to target and make dentry internal
2183                         */
2184                        memcpy(dentry->d_iname, target->d_name.name,
2185                                        target->d_name.len + 1);
2186                        target->d_name.name = dentry->d_name.name;
2187                        dentry->d_name.name = dentry->d_iname;
2188                } else {
2189                        /*
2190                         * Both are internal.  Just copy target to dentry
2191                         */
2192                        memcpy(dentry->d_iname, target->d_name.name,
2193                                        target->d_name.len + 1);
2194                        dentry->d_name.len = target->d_name.len;
2195                        return;
2196                }
2197        }
2198        swap(dentry->d_name.len, target->d_name.len);
2199}
2200
2201static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2202{
2203        /*
2204         * XXXX: do we really need to take target->d_lock?
2205         */
2206        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2207                spin_lock(&target->d_parent->d_lock);
2208        else {
2209                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2210                        spin_lock(&dentry->d_parent->d_lock);
2211                        spin_lock_nested(&target->d_parent->d_lock,
2212                                                DENTRY_D_LOCK_NESTED);
2213                } else {
2214                        spin_lock(&target->d_parent->d_lock);
2215                        spin_lock_nested(&dentry->d_parent->d_lock,
2216                                                DENTRY_D_LOCK_NESTED);
2217                }
2218        }
2219        if (target < dentry) {
2220                spin_lock_nested(&target->d_lock, 2);
2221                spin_lock_nested(&dentry->d_lock, 3);
2222        } else {
2223                spin_lock_nested(&dentry->d_lock, 2);
2224                spin_lock_nested(&target->d_lock, 3);
2225        }
2226}
2227
2228static void dentry_unlock_parents_for_move(struct dentry *dentry,
2229                                        struct dentry *target)
2230{
2231        if (target->d_parent != dentry->d_parent)
2232                spin_unlock(&dentry->d_parent->d_lock);
2233        if (target->d_parent != target)
2234                spin_unlock(&target->d_parent->d_lock);
2235}
2236
2237/*
2238 * When switching names, the actual string doesn't strictly have to
2239 * be preserved in the target - because we're dropping the target
2240 * anyway. As such, we can just do a simple memcpy() to copy over
2241 * the new name before we switch.
2242 *
2243 * Note that we have to be a lot more careful about getting the hash
2244 * switched - we have to switch the hash value properly even if it
2245 * then no longer matches the actual (corrupted) string of the target.
2246 * The hash value has to match the hash queue that the dentry is on..
2247 */
2248/*
2249 * __d_move - move a dentry
2250 * @dentry: entry to move
2251 * @target: new dentry
2252 *
2253 * Update the dcache to reflect the move of a file name. Negative
2254 * dcache entries should not be moved in this way. Caller must hold
2255 * rename_lock, the i_mutex of the source and target directories,
2256 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2257 */
2258static void __d_move(struct dentry * dentry, struct dentry * target)
2259{
2260        if (!dentry->d_inode)
2261                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2262
2263        BUG_ON(d_ancestor(dentry, target));
2264        BUG_ON(d_ancestor(target, dentry));
2265
2266        dentry_lock_for_move(dentry, target);
2267
2268        write_seqcount_begin(&dentry->d_seq);
2269        write_seqcount_begin(&target->d_seq);
2270
2271        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2272
2273        /*
2274         * Move the dentry to the target hash queue. Don't bother checking
2275         * for the same hash queue because of how unlikely it is.
2276         */
2277        __d_drop(dentry);
2278        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2279
2280        /* Unhash the target: dput() will then get rid of it */
2281        __d_drop(target);
2282
2283        list_del(&dentry->d_u.d_child);
2284        list_del(&target->d_u.d_child);
2285
2286        /* Switch the names.. */
2287        switch_names(dentry, target);
2288        swap(dentry->d_name.hash, target->d_name.hash);
2289
2290        /* ... and switch the parents */
2291        if (IS_ROOT(dentry)) {
2292                dentry->d_parent = target->d_parent;
2293                target->d_parent = target;
2294                INIT_LIST_HEAD(&target->d_u.d_child);
2295        } else {
2296                swap(dentry->d_parent, target->d_parent);
2297
2298                /* And add them back to the (new) parent lists */
2299                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2300        }
2301
2302        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2303
2304        write_seqcount_end(&target->d_seq);
2305        write_seqcount_end(&dentry->d_seq);
2306
2307        dentry_unlock_parents_for_move(dentry, target);
2308        spin_unlock(&target->d_lock);
2309        fsnotify_d_move(dentry);
2310        spin_unlock(&dentry->d_lock);
2311}
2312
2313/*
2314 * d_move - move a dentry
2315 * @dentry: entry to move
2316 * @target: new dentry
2317 *
2318 * Update the dcache to reflect the move of a file name. Negative
2319 * dcache entries should not be moved in this way. See the locking
2320 * requirements for __d_move.
2321 */
2322void d_move(struct dentry *dentry, struct dentry *target)
2323{
2324        write_seqlock(&rename_lock);
2325        __d_move(dentry, target);
2326        write_sequnlock(&rename_lock);
2327}
2328EXPORT_SYMBOL(d_move);
2329
2330/**
2331 * d_ancestor - search for an ancestor
2332 * @p1: ancestor dentry
2333 * @p2: child dentry
2334 *
2335 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2336 * an ancestor of p2, else NULL.
2337 */
2338struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2339{
2340        struct dentry *p;
2341
2342        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2343                if (p->d_parent == p1)
2344                        return p;
2345        }
2346        return NULL;
2347}
2348
2349/*
2350 * This helper attempts to cope with remotely renamed directories
2351 *
2352 * It assumes that the caller is already holding
2353 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2354 *
2355 * Note: If ever the locking in lock_rename() changes, then please
2356 * remember to update this too...
2357 */
2358static struct dentry *__d_unalias(struct inode *inode,
2359                struct dentry *dentry, struct dentry *alias)
2360{
2361        struct mutex *m1 = NULL, *m2 = NULL;
2362        struct dentry *ret = ERR_PTR(-EBUSY);
2363
2364        /* If alias and dentry share a parent, then no extra locks required */
2365        if (alias->d_parent == dentry->d_parent)
2366                goto out_unalias;
2367
2368        /* See lock_rename() */
2369        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2370                goto out_err;
2371        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2372        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2373                goto out_err;
2374        m2 = &alias->d_parent->d_inode->i_mutex;
2375out_unalias:
2376        if (likely(!d_mountpoint(alias))) {
2377                __d_move(alias, dentry);
2378                ret = alias;
2379        }
2380out_err:
2381        spin_unlock(&inode->i_lock);
2382        if (m2)
2383                mutex_unlock(m2);
2384        if (m1)
2385                mutex_unlock(m1);
2386        return ret;
2387}
2388
2389/*
2390 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2391 * named dentry in place of the dentry to be replaced.
2392 * returns with anon->d_lock held!
2393 */
2394static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2395{
2396        struct dentry *dparent;
2397
2398        dentry_lock_for_move(anon, dentry);
2399
2400        write_seqcount_begin(&dentry->d_seq);
2401        write_seqcount_begin(&anon->d_seq);
2402
2403        dparent = dentry->d_parent;
2404
2405        switch_names(dentry, anon);
2406        swap(dentry->d_name.hash, anon->d_name.hash);
2407
2408        dentry->d_parent = dentry;
2409        list_del_init(&dentry->d_u.d_child);
2410        anon->d_parent = dparent;
2411        list_del(&anon->d_u.d_child);
2412        list_add(&anon->d_u.d_child, &dparent->d_subdirs);
2413
2414        write_seqcount_end(&dentry->d_seq);
2415        write_seqcount_end(&anon->d_seq);
2416
2417        dentry_unlock_parents_for_move(anon, dentry);
2418        spin_unlock(&dentry->d_lock);
2419
2420        /* anon->d_lock still locked, returns locked */
2421        anon->d_flags &= ~DCACHE_DISCONNECTED;
2422}
2423
2424/**
2425 * d_materialise_unique - introduce an inode into the tree
2426 * @dentry: candidate dentry
2427 * @inode: inode to bind to the dentry, to which aliases may be attached
2428 *
2429 * Introduces an dentry into the tree, substituting an extant disconnected
2430 * root directory alias in its place if there is one. Caller must hold the
2431 * i_mutex of the parent directory.
2432 */
2433struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2434{
2435        struct dentry *actual;
2436
2437        BUG_ON(!d_unhashed(dentry));
2438
2439        if (!inode) {
2440                actual = dentry;
2441                __d_instantiate(dentry, NULL);
2442                d_rehash(actual);
2443                goto out_nolock;
2444        }
2445
2446        spin_lock(&inode->i_lock);
2447
2448        if (S_ISDIR(inode->i_mode)) {
2449                struct dentry *alias;
2450
2451                /* Does an aliased dentry already exist? */
2452                alias = __d_find_alias(inode, 0);
2453                if (alias) {
2454                        actual = alias;
2455                        write_seqlock(&rename_lock);
2456
2457                        if (d_ancestor(alias, dentry)) {
2458                                /* Check for loops */
2459                                actual = ERR_PTR(-ELOOP);
2460                                spin_unlock(&inode->i_lock);
2461                        } else if (IS_ROOT(alias)) {
2462                                /* Is this an anonymous mountpoint that we
2463                                 * could splice into our tree? */
2464                                __d_materialise_dentry(dentry, alias);
2465                                write_sequnlock(&rename_lock);
2466                                __d_drop(alias);
2467                                goto found;
2468                        } else {
2469                                /* Nope, but we must(!) avoid directory
2470                                 * aliasing. This drops inode->i_lock */
2471                                actual = __d_unalias(inode, dentry, alias);
2472                        }
2473                        write_sequnlock(&rename_lock);
2474                        if (IS_ERR(actual)) {
2475                                if (PTR_ERR(actual) == -ELOOP)
2476                                        pr_warn_ratelimited(
2477                                                "VFS: Lookup of '%s' in %s %s"
2478                                                " would have caused loop\n",
2479                                                dentry->d_name.name,
2480                                                inode->i_sb->s_type->name,
2481                                                inode->i_sb->s_id);
2482                                dput(alias);
2483                        }
2484                        goto out_nolock;
2485                }
2486        }
2487
2488        /* Add a unique reference */
2489        actual = __d_instantiate_unique(dentry, inode);
2490        if (!actual)
2491                actual = dentry;
2492        else
2493                BUG_ON(!d_unhashed(actual));
2494
2495        spin_lock(&actual->d_lock);
2496found:
2497        _d_rehash(actual);
2498        spin_unlock(&actual->d_lock);
2499        spin_unlock(&inode->i_lock);
2500out_nolock:
2501        if (actual == dentry) {
2502                security_d_instantiate(dentry, inode);
2503                return NULL;
2504        }
2505
2506        iput(inode);
2507        return actual;
2508}
2509EXPORT_SYMBOL_GPL(d_materialise_unique);
2510
2511static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2512{
2513        *buflen -= namelen;
2514        if (*buflen < 0)
2515                return -ENAMETOOLONG;
2516        *buffer -= namelen;
2517        memcpy(*buffer, str, namelen);
2518        return 0;
2519}
2520
2521static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2522{
2523        return prepend(buffer, buflen, name->name, name->len);
2524}
2525
2526/**
2527 * prepend_path - Prepend path string to a buffer
2528 * @path: the dentry/vfsmount to report
2529 * @root: root vfsmnt/dentry
2530 * @buffer: pointer to the end of the buffer
2531 * @buflen: pointer to buffer length
2532 *
2533 * Caller holds the rename_lock.
2534 */
2535static int prepend_path(const struct path *path,
2536                        const struct path *root,
2537                        char **buffer, int *buflen)
2538{
2539        struct dentry *dentry = path->dentry;
2540        struct vfsmount *vfsmnt = path->mnt;
2541        struct mount *mnt = real_mount(vfsmnt);
2542        bool slash = false;
2543        int error = 0;
2544
2545        while (dentry != root->dentry || vfsmnt != root->mnt) {
2546                struct dentry * parent;
2547
2548                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2549                        /* Global root? */
2550                        if (!mnt_has_parent(mnt))
2551                                goto global_root;
2552                        dentry = mnt->mnt_mountpoint;
2553                        mnt = mnt->mnt_parent;
2554                        vfsmnt = &mnt->mnt;
2555                        continue;
2556                }
2557                parent = dentry->d_parent;
2558                prefetch(parent);
2559                spin_lock(&dentry->d_lock);
2560                error = prepend_name(buffer, buflen, &dentry->d_name);
2561                spin_unlock(&dentry->d_lock);
2562                if (!error)
2563                        error = prepend(buffer, buflen, "/", 1);
2564                if (error)
2565                        break;
2566
2567                slash = true;
2568                dentry = parent;
2569        }
2570
2571        if (!error && !slash)
2572                error = prepend(buffer, buflen, "/", 1);
2573
2574        return error;
2575
2576global_root:
2577        /*
2578         * Filesystems needing to implement special "root names"
2579         * should do so with ->d_dname()
2580         */
2581        if (IS_ROOT(dentry) &&
2582            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2583                WARN(1, "Root dentry has weird name <%.*s>\n",
2584                     (int) dentry->d_name.len, dentry->d_name.name);
2585        }
2586        if (!slash)
2587                error = prepend(buffer, buflen, "/", 1);
2588        if (!error)
2589                error = is_mounted(vfsmnt) ? 1 : 2;
2590        return error;
2591}
2592
2593/**
2594 * __d_path - return the path of a dentry
2595 * @path: the dentry/vfsmount to report
2596 * @root: root vfsmnt/dentry
2597 * @buf: buffer to return value in
2598 * @buflen: buffer length
2599 *
2600 * Convert a dentry into an ASCII path name.
2601 *
2602 * Returns a pointer into the buffer or an error code if the
2603 * path was too long.
2604 *
2605 * "buflen" should be positive.
2606 *
2607 * If the path is not reachable from the supplied root, return %NULL.
2608 */
2609char *__d_path(const struct path *path,
2610               const struct path *root,
2611               char *buf, int buflen)
2612{
2613        char *res = buf + buflen;
2614        int error;
2615
2616        prepend(&res, &buflen, "\0", 1);
2617        br_read_lock(&vfsmount_lock);
2618        write_seqlock(&rename_lock);
2619        error = prepend_path(path, root, &res, &buflen);
2620        write_sequnlock(&rename_lock);
2621        br_read_unlock(&vfsmount_lock);
2622
2623        if (error < 0)
2624                return ERR_PTR(error);
2625        if (error > 0)
2626                return NULL;
2627        return res;
2628}
2629
2630char *d_absolute_path(const struct path *path,
2631               char *buf, int buflen)
2632{
2633        struct path root = {};
2634        char *res = buf + buflen;
2635        int error;
2636
2637        prepend(&res, &buflen, "\0", 1);
2638        br_read_lock(&vfsmount_lock);
2639        write_seqlock(&rename_lock);
2640        error = prepend_path(path, &root, &res, &buflen);
2641        write_sequnlock(&rename_lock);
2642        br_read_unlock(&vfsmount_lock);
2643
2644        if (error > 1)
2645                error = -EINVAL;
2646        if (error < 0)
2647                return ERR_PTR(error);
2648        return res;
2649}
2650
2651/*
2652 * same as __d_path but appends "(deleted)" for unlinked files.
2653 */
2654static int path_with_deleted(const struct path *path,
2655                             const struct path *root,
2656                             char **buf, int *buflen)
2657{
2658        prepend(buf, buflen, "\0", 1);
2659        if (d_unlinked(path->dentry)) {
2660                int error = prepend(buf, buflen, " (deleted)", 10);
2661                if (error)
2662                        return error;
2663        }
2664
2665        return prepend_path(path, root, buf, buflen);
2666}
2667
2668static int prepend_unreachable(char **buffer, int *buflen)
2669{
2670        return prepend(buffer, buflen, "(unreachable)", 13);
2671}
2672
2673/**
2674 * d_path - return the path of a dentry
2675 * @path: path to report
2676 * @buf: buffer to return value in
2677 * @buflen: buffer length
2678 *
2679 * Convert a dentry into an ASCII path name. If the entry has been deleted
2680 * the string " (deleted)" is appended. Note that this is ambiguous.
2681 *
2682 * Returns a pointer into the buffer or an error code if the path was
2683 * too long. Note: Callers should use the returned pointer, not the passed
2684 * in buffer, to use the name! The implementation often starts at an offset
2685 * into the buffer, and may leave 0 bytes at the start.
2686 *
2687 * "buflen" should be positive.
2688 */
2689char *d_path(const struct path *path, char *buf, int buflen)
2690{
2691        char *res = buf + buflen;
2692        struct path root;
2693        int error;
2694
2695        /*
2696         * We have various synthetic filesystems that never get mounted.  On
2697         * these filesystems dentries are never used for lookup purposes, and
2698         * thus don't need to be hashed.  They also don't need a name until a
2699         * user wants to identify the object in /proc/pid/fd/.  The little hack
2700         * below allows us to generate a name for these objects on demand:
2701         */
2702        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2703                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2704
2705        get_fs_root(current->fs, &root);
2706        br_read_lock(&vfsmount_lock);
2707        write_seqlock(&rename_lock);
2708        error = path_with_deleted(path, &root, &res, &buflen);
2709        write_sequnlock(&rename_lock);
2710        br_read_unlock(&vfsmount_lock);
2711        if (error < 0)
2712                res = ERR_PTR(error);
2713        path_put(&root);
2714        return res;
2715}
2716EXPORT_SYMBOL(d_path);
2717
2718/*
2719 * Helper function for dentry_operations.d_dname() members
2720 */
2721char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2722                        const char *fmt, ...)
2723{
2724        va_list args;
2725        char temp[64];
2726        int sz;
2727
2728        va_start(args, fmt);
2729        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2730        va_end(args);
2731
2732        if (sz > sizeof(temp) || sz > buflen)
2733                return ERR_PTR(-ENAMETOOLONG);
2734
2735        buffer += buflen - sz;
2736        return memcpy(buffer, temp, sz);
2737}
2738
2739/*
2740 * Write full pathname from the root of the filesystem into the buffer.
2741 */
2742static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2743{
2744        char *end = buf + buflen;
2745        char *retval;
2746
2747        prepend(&end, &buflen, "\0", 1);
2748        if (buflen < 1)
2749                goto Elong;
2750        /* Get '/' right */
2751        retval = end-1;
2752        *retval = '/';
2753
2754        while (!IS_ROOT(dentry)) {
2755                struct dentry *parent = dentry->d_parent;
2756                int error;
2757
2758                prefetch(parent);
2759                spin_lock(&dentry->d_lock);
2760                error = prepend_name(&end, &buflen, &dentry->d_name);
2761                spin_unlock(&dentry->d_lock);
2762                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2763                        goto Elong;
2764
2765                retval = end;
2766                dentry = parent;
2767        }
2768        return retval;
2769Elong:
2770        return ERR_PTR(-ENAMETOOLONG);
2771}
2772
2773char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2774{
2775        char *retval;
2776
2777        write_seqlock(&rename_lock);
2778        retval = __dentry_path(dentry, buf, buflen);
2779        write_sequnlock(&rename_lock);
2780
2781        return retval;
2782}
2783EXPORT_SYMBOL(dentry_path_raw);
2784
2785char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2786{
2787        char *p = NULL;
2788        char *retval;
2789
2790        write_seqlock(&rename_lock);
2791        if (d_unlinked(dentry)) {
2792                p = buf + buflen;
2793                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2794                        goto Elong;
2795                buflen++;
2796        }
2797        retval = __dentry_path(dentry, buf, buflen);
2798        write_sequnlock(&rename_lock);
2799        if (!IS_ERR(retval) && p)
2800                *p = '/';       /* restore '/' overriden with '\0' */
2801        return retval;
2802Elong:
2803        return ERR_PTR(-ENAMETOOLONG);
2804}
2805
2806/*
2807 * NOTE! The user-level library version returns a
2808 * character pointer. The kernel system call just
2809 * returns the length of the buffer filled (which
2810 * includes the ending '\0' character), or a negative
2811 * error value. So libc would do something like
2812 *
2813 *      char *getcwd(char * buf, size_t size)
2814 *      {
2815 *              int retval;
2816 *
2817 *              retval = sys_getcwd(buf, size);
2818 *              if (retval >= 0)
2819 *                      return buf;
2820 *              errno = -retval;
2821 *              return NULL;
2822 *      }
2823 */
2824SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2825{
2826        int error;
2827        struct path pwd, root;
2828        char *page = (char *) __get_free_page(GFP_USER);
2829
2830        if (!page)
2831                return -ENOMEM;
2832
2833        get_fs_root_and_pwd(current->fs, &root, &pwd);
2834
2835        error = -ENOENT;
2836        br_read_lock(&vfsmount_lock);
2837        write_seqlock(&rename_lock);
2838        if (!d_unlinked(pwd.dentry)) {
2839                unsigned long len;
2840                char *cwd = page + PAGE_SIZE;
2841                int buflen = PAGE_SIZE;
2842
2843                prepend(&cwd, &buflen, "\0", 1);
2844                error = prepend_path(&pwd, &root, &cwd, &buflen);
2845                write_sequnlock(&rename_lock);
2846                br_read_unlock(&vfsmount_lock);
2847
2848                if (error < 0)
2849                        goto out;
2850
2851                /* Unreachable from current root */
2852                if (error > 0) {
2853                        error = prepend_unreachable(&cwd, &buflen);
2854                        if (error)
2855                                goto out;
2856                }
2857
2858                error = -ERANGE;
2859                len = PAGE_SIZE + page - cwd;
2860                if (len <= size) {
2861                        error = len;
2862                        if (copy_to_user(buf, cwd, len))
2863                                error = -EFAULT;
2864                }
2865        } else {
2866                write_sequnlock(&rename_lock);
2867                br_read_unlock(&vfsmount_lock);
2868        }
2869
2870out:
2871        path_put(&pwd);
2872        path_put(&root);
2873        free_page((unsigned long) page);
2874        return error;
2875}
2876
2877/*
2878 * Test whether new_dentry is a subdirectory of old_dentry.
2879 *
2880 * Trivially implemented using the dcache structure
2881 */
2882
2883/**
2884 * is_subdir - is new dentry a subdirectory of old_dentry
2885 * @new_dentry: new dentry
2886 * @old_dentry: old dentry
2887 *
2888 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2889 * Returns 0 otherwise.
2890 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2891 */
2892  
2893int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2894{
2895        int result;
2896        unsigned seq;
2897
2898        if (new_dentry == old_dentry)
2899                return 1;
2900
2901        do {
2902                /* for restarting inner loop in case of seq retry */
2903                seq = read_seqbegin(&rename_lock);
2904                /*
2905                 * Need rcu_readlock to protect against the d_parent trashing
2906                 * due to d_move
2907                 */
2908                rcu_read_lock();
2909                if (d_ancestor(old_dentry, new_dentry))
2910                        result = 1;
2911                else
2912                        result = 0;
2913                rcu_read_unlock();
2914        } while (read_seqretry(&rename_lock, seq));
2915
2916        return result;
2917}
2918
2919void d_genocide(struct dentry *root)
2920{
2921        struct dentry *this_parent;
2922        struct list_head *next;
2923        unsigned seq;
2924        int locked = 0;
2925
2926        seq = read_seqbegin(&rename_lock);
2927again:
2928        this_parent = root;
2929        spin_lock(&this_parent->d_lock);
2930repeat:
2931        next = this_parent->d_subdirs.next;
2932resume:
2933        while (next != &this_parent->d_subdirs) {
2934                struct list_head *tmp = next;
2935                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2936                next = tmp->next;
2937
2938                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2939                if (d_unhashed(dentry) || !dentry->d_inode) {
2940                        spin_unlock(&dentry->d_lock);
2941                        continue;
2942                }
2943                if (!list_empty(&dentry->d_subdirs)) {
2944                        spin_unlock(&this_parent->d_lock);
2945                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2946                        this_parent = dentry;
2947                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2948                        goto repeat;
2949                }
2950                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2951                        dentry->d_flags |= DCACHE_GENOCIDE;
2952                        dentry->d_count--;
2953                }
2954                spin_unlock(&dentry->d_lock);
2955        }
2956        if (this_parent != root) {
2957                struct dentry *child = this_parent;
2958                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2959                        this_parent->d_flags |= DCACHE_GENOCIDE;
2960                        this_parent->d_count--;
2961                }
2962                this_parent = try_to_ascend(this_parent, locked, seq);
2963                if (!this_parent)
2964                        goto rename_retry;
2965                next = child->d_u.d_child.next;
2966                goto resume;
2967        }
2968        spin_unlock(&this_parent->d_lock);
2969        if (!locked && read_seqretry(&rename_lock, seq))
2970                goto rename_retry;
2971        if (locked)
2972                write_sequnlock(&rename_lock);
2973        return;
2974
2975rename_retry:
2976        if (locked)
2977                goto again;
2978        locked = 1;
2979        write_seqlock(&rename_lock);
2980        goto again;
2981}
2982
2983/**
2984 * find_inode_number - check for dentry with name
2985 * @dir: directory to check
2986 * @name: Name to find.
2987 *
2988 * Check whether a dentry already exists for the given name,
2989 * and return the inode number if it has an inode. Otherwise
2990 * 0 is returned.
2991 *
2992 * This routine is used to post-process directory listings for
2993 * filesystems using synthetic inode numbers, and is necessary
2994 * to keep getcwd() working.
2995 */
2996 
2997ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2998{
2999        struct dentry * dentry;
3000        ino_t ino = 0;
3001
3002        dentry = d_hash_and_lookup(dir, name);
3003        if (!IS_ERR_OR_NULL(dentry)) {
3004                if (dentry->d_inode)
3005                        ino = dentry->d_inode->i_ino;
3006                dput(dentry);
3007        }
3008        return ino;
3009}
3010EXPORT_SYMBOL(find_inode_number);
3011
3012static __initdata unsigned long dhash_entries;
3013static int __init set_dhash_entries(char *str)
3014{
3015        if (!str)
3016                return 0;
3017        dhash_entries = simple_strtoul(str, &str, 0);
3018        return 1;
3019}
3020__setup("dhash_entries=", set_dhash_entries);
3021
3022static void __init dcache_init_early(void)
3023{
3024        unsigned int loop;
3025
3026        /* If hashes are distributed across NUMA nodes, defer
3027         * hash allocation until vmalloc space is available.
3028         */
3029        if (hashdist)
3030                return;
3031
3032        dentry_hashtable =
3033                alloc_large_system_hash("Dentry cache",
3034                                        sizeof(struct hlist_bl_head),
3035                                        dhash_entries,
3036                                        13,
3037                                        HASH_EARLY,
3038                                        &d_hash_shift,
3039                                        &d_hash_mask,
3040                                        0,
3041                                        0);
3042
3043        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3044                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3045}
3046
3047static void __init dcache_init(void)
3048{
3049        unsigned int loop;
3050
3051        /* 
3052         * A constructor could be added for stable state like the lists,
3053         * but it is probably not worth it because of the cache nature
3054         * of the dcache. 
3055         */
3056        dentry_cache = KMEM_CACHE(dentry,
3057                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3058
3059        /* Hash may have been set up in dcache_init_early */
3060        if (!hashdist)
3061                return;
3062
3063        dentry_hashtable =
3064                alloc_large_system_hash("Dentry cache",
3065                                        sizeof(struct hlist_bl_head),
3066                                        dhash_entries,
3067                                        13,
3068                                        0,
3069                                        &d_hash_shift,
3070                                        &d_hash_mask,
3071                                        0,
3072                                        0);
3073
3074        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3075                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3076}
3077
3078/* SLAB cache for __getname() consumers */
3079struct kmem_cache *names_cachep __read_mostly;
3080EXPORT_SYMBOL(names_cachep);
3081
3082EXPORT_SYMBOL(d_genocide);
3083
3084void __init vfs_caches_init_early(void)
3085{
3086        dcache_init_early();
3087        inode_init_early();
3088}
3089
3090void __init vfs_caches_init(unsigned long mempages)
3091{
3092        unsigned long reserve;
3093
3094        /* Base hash sizes on available memory, with a reserve equal to
3095           150% of current kernel size */
3096
3097        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3098        mempages -= reserve;
3099
3100        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3101                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3102
3103        dcache_init();
3104        inode_init();
3105        files_init(mempages);
3106        mnt_init();
3107        bdev_cache_init();
3108        chrdev_init();
3109}
3110
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.