linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/module.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include <linux/prefetch.h>
  39#include <linux/ratelimit.h>
  40#include "internal.h"
  41
  42/*
  43 * Usage:
  44 * dcache->d_inode->i_lock protects:
  45 *   - i_dentry, d_alias, d_inode of aliases
  46 * dcache_hash_bucket lock protects:
  47 *   - the dcache hash table
  48 * s_anon bl list spinlock protects:
  49 *   - the s_anon list (see __d_drop)
  50 * dcache_lru_lock protects:
  51 *   - the dcache lru lists and counters
  52 * d_lock protects:
  53 *   - d_flags
  54 *   - d_name
  55 *   - d_lru
  56 *   - d_count
  57 *   - d_unhashed()
  58 *   - d_parent and d_subdirs
  59 *   - childrens' d_child and d_parent
  60 *   - d_alias, d_inode
  61 *
  62 * Ordering:
  63 * dentry->d_inode->i_lock
  64 *   dentry->d_lock
  65 *     dcache_lru_lock
  66 *     dcache_hash_bucket lock
  67 *     s_anon lock
  68 *
  69 * If there is an ancestor relationship:
  70 * dentry->d_parent->...->d_parent->d_lock
  71 *   ...
  72 *     dentry->d_parent->d_lock
  73 *       dentry->d_lock
  74 *
  75 * If no ancestor relationship:
  76 * if (dentry1 < dentry2)
  77 *   dentry1->d_lock
  78 *     dentry2->d_lock
  79 */
  80int sysctl_vfs_cache_pressure __read_mostly = 100;
  81EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  82
  83static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  84__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  85
  86EXPORT_SYMBOL(rename_lock);
  87
  88static struct kmem_cache *dentry_cache __read_mostly;
  89
  90/*
  91 * This is the single most critical data structure when it comes
  92 * to the dcache: the hashtable for lookups. Somebody should try
  93 * to make this good - I've just made it work.
  94 *
  95 * This hash-function tries to avoid losing too many bits of hash
  96 * information, yet avoid using a prime hash-size or similar.
  97 */
  98#define D_HASHBITS     d_hash_shift
  99#define D_HASHMASK     d_hash_mask
 100
 101static unsigned int d_hash_mask __read_mostly;
 102static unsigned int d_hash_shift __read_mostly;
 103
 104static struct hlist_bl_head *dentry_hashtable __read_mostly;
 105
 106static inline struct hlist_bl_head *d_hash(struct dentry *parent,
 107                                        unsigned long hash)
 108{
 109        hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
 110        hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
 111        return dentry_hashtable + (hash & D_HASHMASK);
 112}
 113
 114/* Statistics gathering. */
 115struct dentry_stat_t dentry_stat = {
 116        .age_limit = 45,
 117};
 118
 119static DEFINE_PER_CPU(unsigned int, nr_dentry);
 120
 121#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 122static int get_nr_dentry(void)
 123{
 124        int i;
 125        int sum = 0;
 126        for_each_possible_cpu(i)
 127                sum += per_cpu(nr_dentry, i);
 128        return sum < 0 ? 0 : sum;
 129}
 130
 131int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 132                   size_t *lenp, loff_t *ppos)
 133{
 134        dentry_stat.nr_dentry = get_nr_dentry();
 135        return proc_dointvec(table, write, buffer, lenp, ppos);
 136}
 137#endif
 138
 139static void __d_free(struct rcu_head *head)
 140{
 141        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 142
 143        WARN_ON(!list_empty(&dentry->d_alias));
 144        if (dname_external(dentry))
 145                kfree(dentry->d_name.name);
 146        kmem_cache_free(dentry_cache, dentry); 
 147}
 148
 149/*
 150 * no locks, please.
 151 */
 152static void d_free(struct dentry *dentry)
 153{
 154        BUG_ON(dentry->d_count);
 155        this_cpu_dec(nr_dentry);
 156        if (dentry->d_op && dentry->d_op->d_release)
 157                dentry->d_op->d_release(dentry);
 158
 159        /* if dentry was never visible to RCU, immediate free is OK */
 160        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 161                __d_free(&dentry->d_u.d_rcu);
 162        else
 163                call_rcu(&dentry->d_u.d_rcu, __d_free);
 164}
 165
 166/**
 167 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 168 * @dentry: the target dentry
 169 * After this call, in-progress rcu-walk path lookup will fail. This
 170 * should be called after unhashing, and after changing d_inode (if
 171 * the dentry has not already been unhashed).
 172 */
 173static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 174{
 175        assert_spin_locked(&dentry->d_lock);
 176        /* Go through a barrier */
 177        write_seqcount_barrier(&dentry->d_seq);
 178}
 179
 180/*
 181 * Release the dentry's inode, using the filesystem
 182 * d_iput() operation if defined. Dentry has no refcount
 183 * and is unhashed.
 184 */
 185static void dentry_iput(struct dentry * dentry)
 186        __releases(dentry->d_lock)
 187        __releases(dentry->d_inode->i_lock)
 188{
 189        struct inode *inode = dentry->d_inode;
 190        if (inode) {
 191                dentry->d_inode = NULL;
 192                list_del_init(&dentry->d_alias);
 193                spin_unlock(&dentry->d_lock);
 194                spin_unlock(&inode->i_lock);
 195                if (!inode->i_nlink)
 196                        fsnotify_inoderemove(inode);
 197                if (dentry->d_op && dentry->d_op->d_iput)
 198                        dentry->d_op->d_iput(dentry, inode);
 199                else
 200                        iput(inode);
 201        } else {
 202                spin_unlock(&dentry->d_lock);
 203        }
 204}
 205
 206/*
 207 * Release the dentry's inode, using the filesystem
 208 * d_iput() operation if defined. dentry remains in-use.
 209 */
 210static void dentry_unlink_inode(struct dentry * dentry)
 211        __releases(dentry->d_lock)
 212        __releases(dentry->d_inode->i_lock)
 213{
 214        struct inode *inode = dentry->d_inode;
 215        dentry->d_inode = NULL;
 216        list_del_init(&dentry->d_alias);
 217        dentry_rcuwalk_barrier(dentry);
 218        spin_unlock(&dentry->d_lock);
 219        spin_unlock(&inode->i_lock);
 220        if (!inode->i_nlink)
 221                fsnotify_inoderemove(inode);
 222        if (dentry->d_op && dentry->d_op->d_iput)
 223                dentry->d_op->d_iput(dentry, inode);
 224        else
 225                iput(inode);
 226}
 227
 228/*
 229 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 230 */
 231static void dentry_lru_add(struct dentry *dentry)
 232{
 233        if (list_empty(&dentry->d_lru)) {
 234                spin_lock(&dcache_lru_lock);
 235                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 236                dentry->d_sb->s_nr_dentry_unused++;
 237                dentry_stat.nr_unused++;
 238                spin_unlock(&dcache_lru_lock);
 239        }
 240}
 241
 242static void __dentry_lru_del(struct dentry *dentry)
 243{
 244        list_del_init(&dentry->d_lru);
 245        dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 246        dentry->d_sb->s_nr_dentry_unused--;
 247        dentry_stat.nr_unused--;
 248}
 249
 250/*
 251 * Remove a dentry with references from the LRU.
 252 */
 253static void dentry_lru_del(struct dentry *dentry)
 254{
 255        if (!list_empty(&dentry->d_lru)) {
 256                spin_lock(&dcache_lru_lock);
 257                __dentry_lru_del(dentry);
 258                spin_unlock(&dcache_lru_lock);
 259        }
 260}
 261
 262/*
 263 * Remove a dentry that is unreferenced and about to be pruned
 264 * (unhashed and destroyed) from the LRU, and inform the file system.
 265 * This wrapper should be called _prior_ to unhashing a victim dentry.
 266 */
 267static void dentry_lru_prune(struct dentry *dentry)
 268{
 269        if (!list_empty(&dentry->d_lru)) {
 270                if (dentry->d_flags & DCACHE_OP_PRUNE)
 271                        dentry->d_op->d_prune(dentry);
 272
 273                spin_lock(&dcache_lru_lock);
 274                __dentry_lru_del(dentry);
 275                spin_unlock(&dcache_lru_lock);
 276        }
 277}
 278
 279static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 280{
 281        spin_lock(&dcache_lru_lock);
 282        if (list_empty(&dentry->d_lru)) {
 283                list_add_tail(&dentry->d_lru, list);
 284                dentry->d_sb->s_nr_dentry_unused++;
 285                dentry_stat.nr_unused++;
 286        } else {
 287                list_move_tail(&dentry->d_lru, list);
 288        }
 289        spin_unlock(&dcache_lru_lock);
 290}
 291
 292/**
 293 * d_kill - kill dentry and return parent
 294 * @dentry: dentry to kill
 295 * @parent: parent dentry
 296 *
 297 * The dentry must already be unhashed and removed from the LRU.
 298 *
 299 * If this is the root of the dentry tree, return NULL.
 300 *
 301 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 302 * d_kill.
 303 */
 304static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 305        __releases(dentry->d_lock)
 306        __releases(parent->d_lock)
 307        __releases(dentry->d_inode->i_lock)
 308{
 309        list_del(&dentry->d_u.d_child);
 310        /*
 311         * Inform try_to_ascend() that we are no longer attached to the
 312         * dentry tree
 313         */
 314        dentry->d_flags |= DCACHE_DISCONNECTED;
 315        if (parent)
 316                spin_unlock(&parent->d_lock);
 317        dentry_iput(dentry);
 318        /*
 319         * dentry_iput drops the locks, at which point nobody (except
 320         * transient RCU lookups) can reach this dentry.
 321         */
 322        d_free(dentry);
 323        return parent;
 324}
 325
 326/*
 327 * Unhash a dentry without inserting an RCU walk barrier or checking that
 328 * dentry->d_lock is locked.  The caller must take care of that, if
 329 * appropriate.
 330 */
 331static void __d_shrink(struct dentry *dentry)
 332{
 333        if (!d_unhashed(dentry)) {
 334                struct hlist_bl_head *b;
 335                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 336                        b = &dentry->d_sb->s_anon;
 337                else
 338                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 339
 340                hlist_bl_lock(b);
 341                __hlist_bl_del(&dentry->d_hash);
 342                dentry->d_hash.pprev = NULL;
 343                hlist_bl_unlock(b);
 344        }
 345}
 346
 347/**
 348 * d_drop - drop a dentry
 349 * @dentry: dentry to drop
 350 *
 351 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 352 * be found through a VFS lookup any more. Note that this is different from
 353 * deleting the dentry - d_delete will try to mark the dentry negative if
 354 * possible, giving a successful _negative_ lookup, while d_drop will
 355 * just make the cache lookup fail.
 356 *
 357 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 358 * reason (NFS timeouts or autofs deletes).
 359 *
 360 * __d_drop requires dentry->d_lock.
 361 */
 362void __d_drop(struct dentry *dentry)
 363{
 364        if (!d_unhashed(dentry)) {
 365                __d_shrink(dentry);
 366                dentry_rcuwalk_barrier(dentry);
 367        }
 368}
 369EXPORT_SYMBOL(__d_drop);
 370
 371void d_drop(struct dentry *dentry)
 372{
 373        spin_lock(&dentry->d_lock);
 374        __d_drop(dentry);
 375        spin_unlock(&dentry->d_lock);
 376}
 377EXPORT_SYMBOL(d_drop);
 378
 379/*
 380 * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
 381 * @dentry: dentry to drop
 382 *
 383 * This is called when we do a lookup on a placeholder dentry that needed to be
 384 * looked up.  The dentry should have been hashed in order for it to be found by
 385 * the lookup code, but now needs to be unhashed while we do the actual lookup
 386 * and clear the DCACHE_NEED_LOOKUP flag.
 387 */
 388void d_clear_need_lookup(struct dentry *dentry)
 389{
 390        spin_lock(&dentry->d_lock);
 391        __d_drop(dentry);
 392        dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
 393        spin_unlock(&dentry->d_lock);
 394}
 395EXPORT_SYMBOL(d_clear_need_lookup);
 396
 397/*
 398 * Finish off a dentry we've decided to kill.
 399 * dentry->d_lock must be held, returns with it unlocked.
 400 * If ref is non-zero, then decrement the refcount too.
 401 * Returns dentry requiring refcount drop, or NULL if we're done.
 402 */
 403static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 404        __releases(dentry->d_lock)
 405{
 406        struct inode *inode;
 407        struct dentry *parent;
 408
 409        inode = dentry->d_inode;
 410        if (inode && !spin_trylock(&inode->i_lock)) {
 411relock:
 412                spin_unlock(&dentry->d_lock);
 413                cpu_relax();
 414                return dentry; /* try again with same dentry */
 415        }
 416        if (IS_ROOT(dentry))
 417                parent = NULL;
 418        else
 419                parent = dentry->d_parent;
 420        if (parent && !spin_trylock(&parent->d_lock)) {
 421                if (inode)
 422                        spin_unlock(&inode->i_lock);
 423                goto relock;
 424        }
 425
 426        if (ref)
 427                dentry->d_count--;
 428        /*
 429         * if dentry was on the d_lru list delete it from there.
 430         * inform the fs via d_prune that this dentry is about to be
 431         * unhashed and destroyed.
 432         */
 433        dentry_lru_prune(dentry);
 434        /* if it was on the hash then remove it */
 435        __d_drop(dentry);
 436        return d_kill(dentry, parent);
 437}
 438
 439/* 
 440 * This is dput
 441 *
 442 * This is complicated by the fact that we do not want to put
 443 * dentries that are no longer on any hash chain on the unused
 444 * list: we'd much rather just get rid of them immediately.
 445 *
 446 * However, that implies that we have to traverse the dentry
 447 * tree upwards to the parents which might _also_ now be
 448 * scheduled for deletion (it may have been only waiting for
 449 * its last child to go away).
 450 *
 451 * This tail recursion is done by hand as we don't want to depend
 452 * on the compiler to always get this right (gcc generally doesn't).
 453 * Real recursion would eat up our stack space.
 454 */
 455
 456/*
 457 * dput - release a dentry
 458 * @dentry: dentry to release 
 459 *
 460 * Release a dentry. This will drop the usage count and if appropriate
 461 * call the dentry unlink method as well as removing it from the queues and
 462 * releasing its resources. If the parent dentries were scheduled for release
 463 * they too may now get deleted.
 464 */
 465void dput(struct dentry *dentry)
 466{
 467        if (!dentry)
 468                return;
 469
 470repeat:
 471        if (dentry->d_count == 1)
 472                might_sleep();
 473        spin_lock(&dentry->d_lock);
 474        BUG_ON(!dentry->d_count);
 475        if (dentry->d_count > 1) {
 476                dentry->d_count--;
 477                spin_unlock(&dentry->d_lock);
 478                return;
 479        }
 480
 481        if (dentry->d_flags & DCACHE_OP_DELETE) {
 482                if (dentry->d_op->d_delete(dentry))
 483                        goto kill_it;
 484        }
 485
 486        /* Unreachable? Get rid of it */
 487        if (d_unhashed(dentry))
 488                goto kill_it;
 489
 490        /*
 491         * If this dentry needs lookup, don't set the referenced flag so that it
 492         * is more likely to be cleaned up by the dcache shrinker in case of
 493         * memory pressure.
 494         */
 495        if (!d_need_lookup(dentry))
 496                dentry->d_flags |= DCACHE_REFERENCED;
 497        dentry_lru_add(dentry);
 498
 499        dentry->d_count--;
 500        spin_unlock(&dentry->d_lock);
 501        return;
 502
 503kill_it:
 504        dentry = dentry_kill(dentry, 1);
 505        if (dentry)
 506                goto repeat;
 507}
 508EXPORT_SYMBOL(dput);
 509
 510/**
 511 * d_invalidate - invalidate a dentry
 512 * @dentry: dentry to invalidate
 513 *
 514 * Try to invalidate the dentry if it turns out to be
 515 * possible. If there are other dentries that can be
 516 * reached through this one we can't delete it and we
 517 * return -EBUSY. On success we return 0.
 518 *
 519 * no dcache lock.
 520 */
 521 
 522int d_invalidate(struct dentry * dentry)
 523{
 524        /*
 525         * If it's already been dropped, return OK.
 526         */
 527        spin_lock(&dentry->d_lock);
 528        if (d_unhashed(dentry)) {
 529                spin_unlock(&dentry->d_lock);
 530                return 0;
 531        }
 532        /*
 533         * Check whether to do a partial shrink_dcache
 534         * to get rid of unused child entries.
 535         */
 536        if (!list_empty(&dentry->d_subdirs)) {
 537                spin_unlock(&dentry->d_lock);
 538                shrink_dcache_parent(dentry);
 539                spin_lock(&dentry->d_lock);
 540        }
 541
 542        /*
 543         * Somebody else still using it?
 544         *
 545         * If it's a directory, we can't drop it
 546         * for fear of somebody re-populating it
 547         * with children (even though dropping it
 548         * would make it unreachable from the root,
 549         * we might still populate it if it was a
 550         * working directory or similar).
 551         * We also need to leave mountpoints alone,
 552         * directory or not.
 553         */
 554        if (dentry->d_count > 1 && dentry->d_inode) {
 555                if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
 556                        spin_unlock(&dentry->d_lock);
 557                        return -EBUSY;
 558                }
 559        }
 560
 561        __d_drop(dentry);
 562        spin_unlock(&dentry->d_lock);
 563        return 0;
 564}
 565EXPORT_SYMBOL(d_invalidate);
 566
 567/* This must be called with d_lock held */
 568static inline void __dget_dlock(struct dentry *dentry)
 569{
 570        dentry->d_count++;
 571}
 572
 573static inline void __dget(struct dentry *dentry)
 574{
 575        spin_lock(&dentry->d_lock);
 576        __dget_dlock(dentry);
 577        spin_unlock(&dentry->d_lock);
 578}
 579
 580struct dentry *dget_parent(struct dentry *dentry)
 581{
 582        struct dentry *ret;
 583
 584repeat:
 585        /*
 586         * Don't need rcu_dereference because we re-check it was correct under
 587         * the lock.
 588         */
 589        rcu_read_lock();
 590        ret = dentry->d_parent;
 591        spin_lock(&ret->d_lock);
 592        if (unlikely(ret != dentry->d_parent)) {
 593                spin_unlock(&ret->d_lock);
 594                rcu_read_unlock();
 595                goto repeat;
 596        }
 597        rcu_read_unlock();
 598        BUG_ON(!ret->d_count);
 599        ret->d_count++;
 600        spin_unlock(&ret->d_lock);
 601        return ret;
 602}
 603EXPORT_SYMBOL(dget_parent);
 604
 605/**
 606 * d_find_alias - grab a hashed alias of inode
 607 * @inode: inode in question
 608 * @want_discon:  flag, used by d_splice_alias, to request
 609 *          that only a DISCONNECTED alias be returned.
 610 *
 611 * If inode has a hashed alias, or is a directory and has any alias,
 612 * acquire the reference to alias and return it. Otherwise return NULL.
 613 * Notice that if inode is a directory there can be only one alias and
 614 * it can be unhashed only if it has no children, or if it is the root
 615 * of a filesystem.
 616 *
 617 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 618 * any other hashed alias over that one unless @want_discon is set,
 619 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 620 */
 621static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 622{
 623        struct dentry *alias, *discon_alias;
 624
 625again:
 626        discon_alias = NULL;
 627        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 628                spin_lock(&alias->d_lock);
 629                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 630                        if (IS_ROOT(alias) &&
 631                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 632                                discon_alias = alias;
 633                        } else if (!want_discon) {
 634                                __dget_dlock(alias);
 635                                spin_unlock(&alias->d_lock);
 636                                return alias;
 637                        }
 638                }
 639                spin_unlock(&alias->d_lock);
 640        }
 641        if (discon_alias) {
 642                alias = discon_alias;
 643                spin_lock(&alias->d_lock);
 644                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 645                        if (IS_ROOT(alias) &&
 646                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 647                                __dget_dlock(alias);
 648                                spin_unlock(&alias->d_lock);
 649                                return alias;
 650                        }
 651                }
 652                spin_unlock(&alias->d_lock);
 653                goto again;
 654        }
 655        return NULL;
 656}
 657
 658struct dentry *d_find_alias(struct inode *inode)
 659{
 660        struct dentry *de = NULL;
 661
 662        if (!list_empty(&inode->i_dentry)) {
 663                spin_lock(&inode->i_lock);
 664                de = __d_find_alias(inode, 0);
 665                spin_unlock(&inode->i_lock);
 666        }
 667        return de;
 668}
 669EXPORT_SYMBOL(d_find_alias);
 670
 671/*
 672 *      Try to kill dentries associated with this inode.
 673 * WARNING: you must own a reference to inode.
 674 */
 675void d_prune_aliases(struct inode *inode)
 676{
 677        struct dentry *dentry;
 678restart:
 679        spin_lock(&inode->i_lock);
 680        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 681                spin_lock(&dentry->d_lock);
 682                if (!dentry->d_count) {
 683                        __dget_dlock(dentry);
 684                        __d_drop(dentry);
 685                        spin_unlock(&dentry->d_lock);
 686                        spin_unlock(&inode->i_lock);
 687                        dput(dentry);
 688                        goto restart;
 689                }
 690                spin_unlock(&dentry->d_lock);
 691        }
 692        spin_unlock(&inode->i_lock);
 693}
 694EXPORT_SYMBOL(d_prune_aliases);
 695
 696/*
 697 * Try to throw away a dentry - free the inode, dput the parent.
 698 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 699 * Releases dentry->d_lock.
 700 *
 701 * This may fail if locks cannot be acquired no problem, just try again.
 702 */
 703static void try_prune_one_dentry(struct dentry *dentry)
 704        __releases(dentry->d_lock)
 705{
 706        struct dentry *parent;
 707
 708        parent = dentry_kill(dentry, 0);
 709        /*
 710         * If dentry_kill returns NULL, we have nothing more to do.
 711         * if it returns the same dentry, trylocks failed. In either
 712         * case, just loop again.
 713         *
 714         * Otherwise, we need to prune ancestors too. This is necessary
 715         * to prevent quadratic behavior of shrink_dcache_parent(), but
 716         * is also expected to be beneficial in reducing dentry cache
 717         * fragmentation.
 718         */
 719        if (!parent)
 720                return;
 721        if (parent == dentry)
 722                return;
 723
 724        /* Prune ancestors. */
 725        dentry = parent;
 726        while (dentry) {
 727                spin_lock(&dentry->d_lock);
 728                if (dentry->d_count > 1) {
 729                        dentry->d_count--;
 730                        spin_unlock(&dentry->d_lock);
 731                        return;
 732                }
 733                dentry = dentry_kill(dentry, 1);
 734        }
 735}
 736
 737static void shrink_dentry_list(struct list_head *list)
 738{
 739        struct dentry *dentry;
 740
 741        rcu_read_lock();
 742        for (;;) {
 743                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 744                if (&dentry->d_lru == list)
 745                        break; /* empty */
 746                spin_lock(&dentry->d_lock);
 747                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 748                        spin_unlock(&dentry->d_lock);
 749                        continue;
 750                }
 751
 752                /*
 753                 * We found an inuse dentry which was not removed from
 754                 * the LRU because of laziness during lookup.  Do not free
 755                 * it - just keep it off the LRU list.
 756                 */
 757                if (dentry->d_count) {
 758                        dentry_lru_del(dentry);
 759                        spin_unlock(&dentry->d_lock);
 760                        continue;
 761                }
 762
 763                rcu_read_unlock();
 764
 765                try_prune_one_dentry(dentry);
 766
 767                rcu_read_lock();
 768        }
 769        rcu_read_unlock();
 770}
 771
 772/**
 773 * prune_dcache_sb - shrink the dcache
 774 * @sb: superblock
 775 * @count: number of entries to try to free
 776 *
 777 * Attempt to shrink the superblock dcache LRU by @count entries. This is
 778 * done when we need more memory an called from the superblock shrinker
 779 * function.
 780 *
 781 * This function may fail to free any resources if all the dentries are in
 782 * use.
 783 */
 784void prune_dcache_sb(struct super_block *sb, int count)
 785{
 786        struct dentry *dentry;
 787        LIST_HEAD(referenced);
 788        LIST_HEAD(tmp);
 789
 790relock:
 791        spin_lock(&dcache_lru_lock);
 792        while (!list_empty(&sb->s_dentry_lru)) {
 793                dentry = list_entry(sb->s_dentry_lru.prev,
 794                                struct dentry, d_lru);
 795                BUG_ON(dentry->d_sb != sb);
 796
 797                if (!spin_trylock(&dentry->d_lock)) {
 798                        spin_unlock(&dcache_lru_lock);
 799                        cpu_relax();
 800                        goto relock;
 801                }
 802
 803                if (dentry->d_flags & DCACHE_REFERENCED) {
 804                        dentry->d_flags &= ~DCACHE_REFERENCED;
 805                        list_move(&dentry->d_lru, &referenced);
 806                        spin_unlock(&dentry->d_lock);
 807                } else {
 808                        list_move_tail(&dentry->d_lru, &tmp);
 809                        dentry->d_flags |= DCACHE_SHRINK_LIST;
 810                        spin_unlock(&dentry->d_lock);
 811                        if (!--count)
 812                                break;
 813                }
 814                cond_resched_lock(&dcache_lru_lock);
 815        }
 816        if (!list_empty(&referenced))
 817                list_splice(&referenced, &sb->s_dentry_lru);
 818        spin_unlock(&dcache_lru_lock);
 819
 820        shrink_dentry_list(&tmp);
 821}
 822
 823/**
 824 * shrink_dcache_sb - shrink dcache for a superblock
 825 * @sb: superblock
 826 *
 827 * Shrink the dcache for the specified super block. This is used to free
 828 * the dcache before unmounting a file system.
 829 */
 830void shrink_dcache_sb(struct super_block *sb)
 831{
 832        LIST_HEAD(tmp);
 833
 834        spin_lock(&dcache_lru_lock);
 835        while (!list_empty(&sb->s_dentry_lru)) {
 836                list_splice_init(&sb->s_dentry_lru, &tmp);
 837                spin_unlock(&dcache_lru_lock);
 838                shrink_dentry_list(&tmp);
 839                spin_lock(&dcache_lru_lock);
 840        }
 841        spin_unlock(&dcache_lru_lock);
 842}
 843EXPORT_SYMBOL(shrink_dcache_sb);
 844
 845/*
 846 * destroy a single subtree of dentries for unmount
 847 * - see the comments on shrink_dcache_for_umount() for a description of the
 848 *   locking
 849 */
 850static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 851{
 852        struct dentry *parent;
 853
 854        BUG_ON(!IS_ROOT(dentry));
 855
 856        for (;;) {
 857                /* descend to the first leaf in the current subtree */
 858                while (!list_empty(&dentry->d_subdirs))
 859                        dentry = list_entry(dentry->d_subdirs.next,
 860                                            struct dentry, d_u.d_child);
 861
 862                /* consume the dentries from this leaf up through its parents
 863                 * until we find one with children or run out altogether */
 864                do {
 865                        struct inode *inode;
 866
 867                        /*
 868                         * remove the dentry from the lru, and inform
 869                         * the fs that this dentry is about to be
 870                         * unhashed and destroyed.
 871                         */
 872                        dentry_lru_prune(dentry);
 873                        __d_shrink(dentry);
 874
 875                        if (dentry->d_count != 0) {
 876                                printk(KERN_ERR
 877                                       "BUG: Dentry %p{i=%lx,n=%s}"
 878                                       " still in use (%d)"
 879                                       " [unmount of %s %s]\n",
 880                                       dentry,
 881                                       dentry->d_inode ?
 882                                       dentry->d_inode->i_ino : 0UL,
 883                                       dentry->d_name.name,
 884                                       dentry->d_count,
 885                                       dentry->d_sb->s_type->name,
 886                                       dentry->d_sb->s_id);
 887                                BUG();
 888                        }
 889
 890                        if (IS_ROOT(dentry)) {
 891                                parent = NULL;
 892                                list_del(&dentry->d_u.d_child);
 893                        } else {
 894                                parent = dentry->d_parent;
 895                                parent->d_count--;
 896                                list_del(&dentry->d_u.d_child);
 897                        }
 898
 899                        inode = dentry->d_inode;
 900                        if (inode) {
 901                                dentry->d_inode = NULL;
 902                                list_del_init(&dentry->d_alias);
 903                                if (dentry->d_op && dentry->d_op->d_iput)
 904                                        dentry->d_op->d_iput(dentry, inode);
 905                                else
 906                                        iput(inode);
 907                        }
 908
 909                        d_free(dentry);
 910
 911                        /* finished when we fall off the top of the tree,
 912                         * otherwise we ascend to the parent and move to the
 913                         * next sibling if there is one */
 914                        if (!parent)
 915                                return;
 916                        dentry = parent;
 917                } while (list_empty(&dentry->d_subdirs));
 918
 919                dentry = list_entry(dentry->d_subdirs.next,
 920                                    struct dentry, d_u.d_child);
 921        }
 922}
 923
 924/*
 925 * destroy the dentries attached to a superblock on unmounting
 926 * - we don't need to use dentry->d_lock because:
 927 *   - the superblock is detached from all mountings and open files, so the
 928 *     dentry trees will not be rearranged by the VFS
 929 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 930 *     any dentries belonging to this superblock that it comes across
 931 *   - the filesystem itself is no longer permitted to rearrange the dentries
 932 *     in this superblock
 933 */
 934void shrink_dcache_for_umount(struct super_block *sb)
 935{
 936        struct dentry *dentry;
 937
 938        if (down_read_trylock(&sb->s_umount))
 939                BUG();
 940
 941        dentry = sb->s_root;
 942        sb->s_root = NULL;
 943        dentry->d_count--;
 944        shrink_dcache_for_umount_subtree(dentry);
 945
 946        while (!hlist_bl_empty(&sb->s_anon)) {
 947                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 948                shrink_dcache_for_umount_subtree(dentry);
 949        }
 950}
 951
 952/*
 953 * This tries to ascend one level of parenthood, but
 954 * we can race with renaming, so we need to re-check
 955 * the parenthood after dropping the lock and check
 956 * that the sequence number still matches.
 957 */
 958static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
 959{
 960        struct dentry *new = old->d_parent;
 961
 962        rcu_read_lock();
 963        spin_unlock(&old->d_lock);
 964        spin_lock(&new->d_lock);
 965
 966        /*
 967         * might go back up the wrong parent if we have had a rename
 968         * or deletion
 969         */
 970        if (new != old->d_parent ||
 971                 (old->d_flags & DCACHE_DISCONNECTED) ||
 972                 (!locked && read_seqretry(&rename_lock, seq))) {
 973                spin_unlock(&new->d_lock);
 974                new = NULL;
 975        }
 976        rcu_read_unlock();
 977        return new;
 978}
 979
 980
 981/*
 982 * Search for at least 1 mount point in the dentry's subdirs.
 983 * We descend to the next level whenever the d_subdirs
 984 * list is non-empty and continue searching.
 985 */
 986 
 987/**
 988 * have_submounts - check for mounts over a dentry
 989 * @parent: dentry to check.
 990 *
 991 * Return true if the parent or its subdirectories contain
 992 * a mount point
 993 */
 994int have_submounts(struct dentry *parent)
 995{
 996        struct dentry *this_parent;
 997        struct list_head *next;
 998        unsigned seq;
 999        int locked = 0;
1000
1001        seq = read_seqbegin(&rename_lock);
1002again:
1003        this_parent = parent;
1004
1005        if (d_mountpoint(parent))
1006                goto positive;
1007        spin_lock(&this_parent->d_lock);
1008repeat:
1009        next = this_parent->d_subdirs.next;
1010resume:
1011        while (next != &this_parent->d_subdirs) {
1012                struct list_head *tmp = next;
1013                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1014                next = tmp->next;
1015
1016                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1017                /* Have we found a mount point ? */
1018                if (d_mountpoint(dentry)) {
1019                        spin_unlock(&dentry->d_lock);
1020                        spin_unlock(&this_parent->d_lock);
1021                        goto positive;
1022                }
1023                if (!list_empty(&dentry->d_subdirs)) {
1024                        spin_unlock(&this_parent->d_lock);
1025                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1026                        this_parent = dentry;
1027                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1028                        goto repeat;
1029                }
1030                spin_unlock(&dentry->d_lock);
1031        }
1032        /*
1033         * All done at this level ... ascend and resume the search.
1034         */
1035        if (this_parent != parent) {
1036                struct dentry *child = this_parent;
1037                this_parent = try_to_ascend(this_parent, locked, seq);
1038                if (!this_parent)
1039                        goto rename_retry;
1040                next = child->d_u.d_child.next;
1041                goto resume;
1042        }
1043        spin_unlock(&this_parent->d_lock);
1044        if (!locked && read_seqretry(&rename_lock, seq))
1045                goto rename_retry;
1046        if (locked)
1047                write_sequnlock(&rename_lock);
1048        return 0; /* No mount points found in tree */
1049positive:
1050        if (!locked && read_seqretry(&rename_lock, seq))
1051                goto rename_retry;
1052        if (locked)
1053                write_sequnlock(&rename_lock);
1054        return 1;
1055
1056rename_retry:
1057        locked = 1;
1058        write_seqlock(&rename_lock);
1059        goto again;
1060}
1061EXPORT_SYMBOL(have_submounts);
1062
1063/*
1064 * Search the dentry child list for the specified parent,
1065 * and move any unused dentries to the end of the unused
1066 * list for prune_dcache(). We descend to the next level
1067 * whenever the d_subdirs list is non-empty and continue
1068 * searching.
1069 *
1070 * It returns zero iff there are no unused children,
1071 * otherwise  it returns the number of children moved to
1072 * the end of the unused list. This may not be the total
1073 * number of unused children, because select_parent can
1074 * drop the lock and return early due to latency
1075 * constraints.
1076 */
1077static int select_parent(struct dentry *parent, struct list_head *dispose)
1078{
1079        struct dentry *this_parent;
1080        struct list_head *next;
1081        unsigned seq;
1082        int found = 0;
1083        int locked = 0;
1084
1085        seq = read_seqbegin(&rename_lock);
1086again:
1087        this_parent = parent;
1088        spin_lock(&this_parent->d_lock);
1089repeat:
1090        next = this_parent->d_subdirs.next;
1091resume:
1092        while (next != &this_parent->d_subdirs) {
1093                struct list_head *tmp = next;
1094                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1095                next = tmp->next;
1096
1097                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1098
1099                /*
1100                 * move only zero ref count dentries to the dispose list.
1101                 *
1102                 * Those which are presently on the shrink list, being processed
1103                 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1104                 * loop in shrink_dcache_parent() might not make any progress
1105                 * and loop forever.
1106                 */
1107                if (dentry->d_count) {
1108                        dentry_lru_del(dentry);
1109                } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1110                        dentry_lru_move_list(dentry, dispose);
1111                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1112                        found++;
1113                }
1114                /*
1115                 * We can return to the caller if we have found some (this
1116                 * ensures forward progress). We'll be coming back to find
1117                 * the rest.
1118                 */
1119                if (found && need_resched()) {
1120                        spin_unlock(&dentry->d_lock);
1121                        goto out;
1122                }
1123
1124                /*
1125                 * Descend a level if the d_subdirs list is non-empty.
1126                 */
1127                if (!list_empty(&dentry->d_subdirs)) {
1128                        spin_unlock(&this_parent->d_lock);
1129                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1130                        this_parent = dentry;
1131                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1132                        goto repeat;
1133                }
1134
1135                spin_unlock(&dentry->d_lock);
1136        }
1137        /*
1138         * All done at this level ... ascend and resume the search.
1139         */
1140        if (this_parent != parent) {
1141                struct dentry *child = this_parent;
1142                this_parent = try_to_ascend(this_parent, locked, seq);
1143                if (!this_parent)
1144                        goto rename_retry;
1145                next = child->d_u.d_child.next;
1146                goto resume;
1147        }
1148out:
1149        spin_unlock(&this_parent->d_lock);
1150        if (!locked && read_seqretry(&rename_lock, seq))
1151                goto rename_retry;
1152        if (locked)
1153                write_sequnlock(&rename_lock);
1154        return found;
1155
1156rename_retry:
1157        if (found)
1158                return found;
1159        locked = 1;
1160        write_seqlock(&rename_lock);
1161        goto again;
1162}
1163
1164/**
1165 * shrink_dcache_parent - prune dcache
1166 * @parent: parent of entries to prune
1167 *
1168 * Prune the dcache to remove unused children of the parent dentry.
1169 */
1170void shrink_dcache_parent(struct dentry * parent)
1171{
1172        LIST_HEAD(dispose);
1173        int found;
1174
1175        while ((found = select_parent(parent, &dispose)) != 0)
1176                shrink_dentry_list(&dispose);
1177}
1178EXPORT_SYMBOL(shrink_dcache_parent);
1179
1180/**
1181 * __d_alloc    -       allocate a dcache entry
1182 * @sb: filesystem it will belong to
1183 * @name: qstr of the name
1184 *
1185 * Allocates a dentry. It returns %NULL if there is insufficient memory
1186 * available. On a success the dentry is returned. The name passed in is
1187 * copied and the copy passed in may be reused after this call.
1188 */
1189 
1190struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1191{
1192        struct dentry *dentry;
1193        char *dname;
1194
1195        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1196        if (!dentry)
1197                return NULL;
1198
1199        if (name->len > DNAME_INLINE_LEN-1) {
1200                dname = kmalloc(name->len + 1, GFP_KERNEL);
1201                if (!dname) {
1202                        kmem_cache_free(dentry_cache, dentry); 
1203                        return NULL;
1204                }
1205        } else  {
1206                dname = dentry->d_iname;
1207        }       
1208        dentry->d_name.name = dname;
1209
1210        dentry->d_name.len = name->len;
1211        dentry->d_name.hash = name->hash;
1212        memcpy(dname, name->name, name->len);
1213        dname[name->len] = 0;
1214
1215        dentry->d_count = 1;
1216        dentry->d_flags = 0;
1217        spin_lock_init(&dentry->d_lock);
1218        seqcount_init(&dentry->d_seq);
1219        dentry->d_inode = NULL;
1220        dentry->d_parent = dentry;
1221        dentry->d_sb = sb;
1222        dentry->d_op = NULL;
1223        dentry->d_fsdata = NULL;
1224        INIT_HLIST_BL_NODE(&dentry->d_hash);
1225        INIT_LIST_HEAD(&dentry->d_lru);
1226        INIT_LIST_HEAD(&dentry->d_subdirs);
1227        INIT_LIST_HEAD(&dentry->d_alias);
1228        INIT_LIST_HEAD(&dentry->d_u.d_child);
1229        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1230
1231        this_cpu_inc(nr_dentry);
1232
1233        return dentry;
1234}
1235
1236/**
1237 * d_alloc      -       allocate a dcache entry
1238 * @parent: parent of entry to allocate
1239 * @name: qstr of the name
1240 *
1241 * Allocates a dentry. It returns %NULL if there is insufficient memory
1242 * available. On a success the dentry is returned. The name passed in is
1243 * copied and the copy passed in may be reused after this call.
1244 */
1245struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1246{
1247        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1248        if (!dentry)
1249                return NULL;
1250
1251        spin_lock(&parent->d_lock);
1252        /*
1253         * don't need child lock because it is not subject
1254         * to concurrency here
1255         */
1256        __dget_dlock(parent);
1257        dentry->d_parent = parent;
1258        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1259        spin_unlock(&parent->d_lock);
1260
1261        return dentry;
1262}
1263EXPORT_SYMBOL(d_alloc);
1264
1265struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1266{
1267        struct dentry *dentry = __d_alloc(sb, name);
1268        if (dentry)
1269                dentry->d_flags |= DCACHE_DISCONNECTED;
1270        return dentry;
1271}
1272EXPORT_SYMBOL(d_alloc_pseudo);
1273
1274struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1275{
1276        struct qstr q;
1277
1278        q.name = name;
1279        q.len = strlen(name);
1280        q.hash = full_name_hash(q.name, q.len);
1281        return d_alloc(parent, &q);
1282}
1283EXPORT_SYMBOL(d_alloc_name);
1284
1285void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1286{
1287        WARN_ON_ONCE(dentry->d_op);
1288        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1289                                DCACHE_OP_COMPARE       |
1290                                DCACHE_OP_REVALIDATE    |
1291                                DCACHE_OP_DELETE ));
1292        dentry->d_op = op;
1293        if (!op)
1294                return;
1295        if (op->d_hash)
1296                dentry->d_flags |= DCACHE_OP_HASH;
1297        if (op->d_compare)
1298                dentry->d_flags |= DCACHE_OP_COMPARE;
1299        if (op->d_revalidate)
1300                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1301        if (op->d_delete)
1302                dentry->d_flags |= DCACHE_OP_DELETE;
1303        if (op->d_prune)
1304                dentry->d_flags |= DCACHE_OP_PRUNE;
1305
1306}
1307EXPORT_SYMBOL(d_set_d_op);
1308
1309static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1310{
1311        spin_lock(&dentry->d_lock);
1312        if (inode) {
1313                if (unlikely(IS_AUTOMOUNT(inode)))
1314                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1315                list_add(&dentry->d_alias, &inode->i_dentry);
1316        }
1317        dentry->d_inode = inode;
1318        dentry_rcuwalk_barrier(dentry);
1319        spin_unlock(&dentry->d_lock);
1320        fsnotify_d_instantiate(dentry, inode);
1321}
1322
1323/**
1324 * d_instantiate - fill in inode information for a dentry
1325 * @entry: dentry to complete
1326 * @inode: inode to attach to this dentry
1327 *
1328 * Fill in inode information in the entry.
1329 *
1330 * This turns negative dentries into productive full members
1331 * of society.
1332 *
1333 * NOTE! This assumes that the inode count has been incremented
1334 * (or otherwise set) by the caller to indicate that it is now
1335 * in use by the dcache.
1336 */
1337 
1338void d_instantiate(struct dentry *entry, struct inode * inode)
1339{
1340        BUG_ON(!list_empty(&entry->d_alias));
1341        if (inode)
1342                spin_lock(&inode->i_lock);
1343        __d_instantiate(entry, inode);
1344        if (inode)
1345                spin_unlock(&inode->i_lock);
1346        security_d_instantiate(entry, inode);
1347}
1348EXPORT_SYMBOL(d_instantiate);
1349
1350/**
1351 * d_instantiate_unique - instantiate a non-aliased dentry
1352 * @entry: dentry to instantiate
1353 * @inode: inode to attach to this dentry
1354 *
1355 * Fill in inode information in the entry. On success, it returns NULL.
1356 * If an unhashed alias of "entry" already exists, then we return the
1357 * aliased dentry instead and drop one reference to inode.
1358 *
1359 * Note that in order to avoid conflicts with rename() etc, the caller
1360 * had better be holding the parent directory semaphore.
1361 *
1362 * This also assumes that the inode count has been incremented
1363 * (or otherwise set) by the caller to indicate that it is now
1364 * in use by the dcache.
1365 */
1366static struct dentry *__d_instantiate_unique(struct dentry *entry,
1367                                             struct inode *inode)
1368{
1369        struct dentry *alias;
1370        int len = entry->d_name.len;
1371        const char *name = entry->d_name.name;
1372        unsigned int hash = entry->d_name.hash;
1373
1374        if (!inode) {
1375                __d_instantiate(entry, NULL);
1376                return NULL;
1377        }
1378
1379        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1380                struct qstr *qstr = &alias->d_name;
1381
1382                /*
1383                 * Don't need alias->d_lock here, because aliases with
1384                 * d_parent == entry->d_parent are not subject to name or
1385                 * parent changes, because the parent inode i_mutex is held.
1386                 */
1387                if (qstr->hash != hash)
1388                        continue;
1389                if (alias->d_parent != entry->d_parent)
1390                        continue;
1391                if (dentry_cmp(qstr->name, qstr->len, name, len))
1392                        continue;
1393                __dget(alias);
1394                return alias;
1395        }
1396
1397        __d_instantiate(entry, inode);
1398        return NULL;
1399}
1400
1401struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1402{
1403        struct dentry *result;
1404
1405        BUG_ON(!list_empty(&entry->d_alias));
1406
1407        if (inode)
1408                spin_lock(&inode->i_lock);
1409        result = __d_instantiate_unique(entry, inode);
1410        if (inode)
1411                spin_unlock(&inode->i_lock);
1412
1413        if (!result) {
1414                security_d_instantiate(entry, inode);
1415                return NULL;
1416        }
1417
1418        BUG_ON(!d_unhashed(result));
1419        iput(inode);
1420        return result;
1421}
1422
1423EXPORT_SYMBOL(d_instantiate_unique);
1424
1425/**
1426 * d_alloc_root - allocate root dentry
1427 * @root_inode: inode to allocate the root for
1428 *
1429 * Allocate a root ("/") dentry for the inode given. The inode is
1430 * instantiated and returned. %NULL is returned if there is insufficient
1431 * memory or the inode passed is %NULL.
1432 */
1433 
1434struct dentry * d_alloc_root(struct inode * root_inode)
1435{
1436        struct dentry *res = NULL;
1437
1438        if (root_inode) {
1439                static const struct qstr name = { .name = "/", .len = 1 };
1440
1441                res = __d_alloc(root_inode->i_sb, &name);
1442                if (res)
1443                        d_instantiate(res, root_inode);
1444        }
1445        return res;
1446}
1447EXPORT_SYMBOL(d_alloc_root);
1448
1449static struct dentry * __d_find_any_alias(struct inode *inode)
1450{
1451        struct dentry *alias;
1452
1453        if (list_empty(&inode->i_dentry))
1454                return NULL;
1455        alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1456        __dget(alias);
1457        return alias;
1458}
1459
1460static struct dentry * d_find_any_alias(struct inode *inode)
1461{
1462        struct dentry *de;
1463
1464        spin_lock(&inode->i_lock);
1465        de = __d_find_any_alias(inode);
1466        spin_unlock(&inode->i_lock);
1467        return de;
1468}
1469
1470
1471/**
1472 * d_obtain_alias - find or allocate a dentry for a given inode
1473 * @inode: inode to allocate the dentry for
1474 *
1475 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1476 * similar open by handle operations.  The returned dentry may be anonymous,
1477 * or may have a full name (if the inode was already in the cache).
1478 *
1479 * When called on a directory inode, we must ensure that the inode only ever
1480 * has one dentry.  If a dentry is found, that is returned instead of
1481 * allocating a new one.
1482 *
1483 * On successful return, the reference to the inode has been transferred
1484 * to the dentry.  In case of an error the reference on the inode is released.
1485 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1486 * be passed in and will be the error will be propagate to the return value,
1487 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1488 */
1489struct dentry *d_obtain_alias(struct inode *inode)
1490{
1491        static const struct qstr anonstring = { .name = "" };
1492        struct dentry *tmp;
1493        struct dentry *res;
1494
1495        if (!inode)
1496                return ERR_PTR(-ESTALE);
1497        if (IS_ERR(inode))
1498                return ERR_CAST(inode);
1499
1500        res = d_find_any_alias(inode);
1501        if (res)
1502                goto out_iput;
1503
1504        tmp = __d_alloc(inode->i_sb, &anonstring);
1505        if (!tmp) {
1506                res = ERR_PTR(-ENOMEM);
1507                goto out_iput;
1508        }
1509
1510        spin_lock(&inode->i_lock);
1511        res = __d_find_any_alias(inode);
1512        if (res) {
1513                spin_unlock(&inode->i_lock);
1514                dput(tmp);
1515                goto out_iput;
1516        }
1517
1518        /* attach a disconnected dentry */
1519        spin_lock(&tmp->d_lock);
1520        tmp->d_inode = inode;
1521        tmp->d_flags |= DCACHE_DISCONNECTED;
1522        list_add(&tmp->d_alias, &inode->i_dentry);
1523        hlist_bl_lock(&tmp->d_sb->s_anon);
1524        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1525        hlist_bl_unlock(&tmp->d_sb->s_anon);
1526        spin_unlock(&tmp->d_lock);
1527        spin_unlock(&inode->i_lock);
1528        security_d_instantiate(tmp, inode);
1529
1530        return tmp;
1531
1532 out_iput:
1533        if (res && !IS_ERR(res))
1534                security_d_instantiate(res, inode);
1535        iput(inode);
1536        return res;
1537}
1538EXPORT_SYMBOL(d_obtain_alias);
1539
1540/**
1541 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1542 * @inode:  the inode which may have a disconnected dentry
1543 * @dentry: a negative dentry which we want to point to the inode.
1544 *
1545 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1546 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1547 * and return it, else simply d_add the inode to the dentry and return NULL.
1548 *
1549 * This is needed in the lookup routine of any filesystem that is exportable
1550 * (via knfsd) so that we can build dcache paths to directories effectively.
1551 *
1552 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1553 * is returned.  This matches the expected return value of ->lookup.
1554 *
1555 */
1556struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1557{
1558        struct dentry *new = NULL;
1559
1560        if (IS_ERR(inode))
1561                return ERR_CAST(inode);
1562
1563        if (inode && S_ISDIR(inode->i_mode)) {
1564                spin_lock(&inode->i_lock);
1565                new = __d_find_alias(inode, 1);
1566                if (new) {
1567                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1568                        spin_unlock(&inode->i_lock);
1569                        security_d_instantiate(new, inode);
1570                        d_move(new, dentry);
1571                        iput(inode);
1572                } else {
1573                        /* already taking inode->i_lock, so d_add() by hand */
1574                        __d_instantiate(dentry, inode);
1575                        spin_unlock(&inode->i_lock);
1576                        security_d_instantiate(dentry, inode);
1577                        d_rehash(dentry);
1578                }
1579        } else
1580                d_add(dentry, inode);
1581        return new;
1582}
1583EXPORT_SYMBOL(d_splice_alias);
1584
1585/**
1586 * d_add_ci - lookup or allocate new dentry with case-exact name
1587 * @inode:  the inode case-insensitive lookup has found
1588 * @dentry: the negative dentry that was passed to the parent's lookup func
1589 * @name:   the case-exact name to be associated with the returned dentry
1590 *
1591 * This is to avoid filling the dcache with case-insensitive names to the
1592 * same inode, only the actual correct case is stored in the dcache for
1593 * case-insensitive filesystems.
1594 *
1595 * For a case-insensitive lookup match and if the the case-exact dentry
1596 * already exists in in the dcache, use it and return it.
1597 *
1598 * If no entry exists with the exact case name, allocate new dentry with
1599 * the exact case, and return the spliced entry.
1600 */
1601struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1602                        struct qstr *name)
1603{
1604        int error;
1605        struct dentry *found;
1606        struct dentry *new;
1607
1608        /*
1609         * First check if a dentry matching the name already exists,
1610         * if not go ahead and create it now.
1611         */
1612        found = d_hash_and_lookup(dentry->d_parent, name);
1613        if (!found) {
1614                new = d_alloc(dentry->d_parent, name);
1615                if (!new) {
1616                        error = -ENOMEM;
1617                        goto err_out;
1618                }
1619
1620                found = d_splice_alias(inode, new);
1621                if (found) {
1622                        dput(new);
1623                        return found;
1624                }
1625                return new;
1626        }
1627
1628        /*
1629         * If a matching dentry exists, and it's not negative use it.
1630         *
1631         * Decrement the reference count to balance the iget() done
1632         * earlier on.
1633         */
1634        if (found->d_inode) {
1635                if (unlikely(found->d_inode != inode)) {
1636                        /* This can't happen because bad inodes are unhashed. */
1637                        BUG_ON(!is_bad_inode(inode));
1638                        BUG_ON(!is_bad_inode(found->d_inode));
1639                }
1640                iput(inode);
1641                return found;
1642        }
1643
1644        /*
1645         * We are going to instantiate this dentry, unhash it and clear the
1646         * lookup flag so we can do that.
1647         */
1648        if (unlikely(d_need_lookup(found)))
1649                d_clear_need_lookup(found);
1650
1651        /*
1652         * Negative dentry: instantiate it unless the inode is a directory and
1653         * already has a dentry.
1654         */
1655        new = d_splice_alias(inode, found);
1656        if (new) {
1657                dput(found);
1658                found = new;
1659        }
1660        return found;
1661
1662err_out:
1663        iput(inode);
1664        return ERR_PTR(error);
1665}
1666EXPORT_SYMBOL(d_add_ci);
1667
1668/**
1669 * __d_lookup_rcu - search for a dentry (racy, store-free)
1670 * @parent: parent dentry
1671 * @name: qstr of name we wish to find
1672 * @seq: returns d_seq value at the point where the dentry was found
1673 * @inode: returns dentry->d_inode when the inode was found valid.
1674 * Returns: dentry, or NULL
1675 *
1676 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1677 * resolution (store-free path walking) design described in
1678 * Documentation/filesystems/path-lookup.txt.
1679 *
1680 * This is not to be used outside core vfs.
1681 *
1682 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1683 * held, and rcu_read_lock held. The returned dentry must not be stored into
1684 * without taking d_lock and checking d_seq sequence count against @seq
1685 * returned here.
1686 *
1687 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1688 * function.
1689 *
1690 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1691 * the returned dentry, so long as its parent's seqlock is checked after the
1692 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1693 * is formed, giving integrity down the path walk.
1694 */
1695struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1696                                unsigned *seq, struct inode **inode)
1697{
1698        unsigned int len = name->len;
1699        unsigned int hash = name->hash;
1700        const unsigned char *str = name->name;
1701        struct hlist_bl_head *b = d_hash(parent, hash);
1702        struct hlist_bl_node *node;
1703        struct dentry *dentry;
1704
1705        /*
1706         * Note: There is significant duplication with __d_lookup_rcu which is
1707         * required to prevent single threaded performance regressions
1708         * especially on architectures where smp_rmb (in seqcounts) are costly.
1709         * Keep the two functions in sync.
1710         */
1711
1712        /*
1713         * The hash list is protected using RCU.
1714         *
1715         * Carefully use d_seq when comparing a candidate dentry, to avoid
1716         * races with d_move().
1717         *
1718         * It is possible that concurrent renames can mess up our list
1719         * walk here and result in missing our dentry, resulting in the
1720         * false-negative result. d_lookup() protects against concurrent
1721         * renames using rename_lock seqlock.
1722         *
1723         * See Documentation/filesystems/path-lookup.txt for more details.
1724         */
1725        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1726                struct inode *i;
1727                const char *tname;
1728                int tlen;
1729
1730                if (dentry->d_name.hash != hash)
1731                        continue;
1732
1733seqretry:
1734                *seq = read_seqcount_begin(&dentry->d_seq);
1735                if (dentry->d_parent != parent)
1736                        continue;
1737                if (d_unhashed(dentry))
1738                        continue;
1739                tlen = dentry->d_name.len;
1740                tname = dentry->d_name.name;
1741                i = dentry->d_inode;
1742                prefetch(tname);
1743                /*
1744                 * This seqcount check is required to ensure name and
1745                 * len are loaded atomically, so as not to walk off the
1746                 * edge of memory when walking. If we could load this
1747                 * atomically some other way, we could drop this check.
1748                 */
1749                if (read_seqcount_retry(&dentry->d_seq, *seq))
1750                        goto seqretry;
1751                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1752                        if (parent->d_op->d_compare(parent, *inode,
1753                                                dentry, i,
1754                                                tlen, tname, name))
1755                                continue;
1756                } else {
1757                        if (dentry_cmp(tname, tlen, str, len))
1758                                continue;
1759                }
1760                /*
1761                 * No extra seqcount check is required after the name
1762                 * compare. The caller must perform a seqcount check in
1763                 * order to do anything useful with the returned dentry
1764                 * anyway.
1765                 */
1766                *inode = i;
1767                return dentry;
1768        }
1769        return NULL;
1770}
1771
1772/**
1773 * d_lookup - search for a dentry
1774 * @parent: parent dentry
1775 * @name: qstr of name we wish to find
1776 * Returns: dentry, or NULL
1777 *
1778 * d_lookup searches the children of the parent dentry for the name in
1779 * question. If the dentry is found its reference count is incremented and the
1780 * dentry is returned. The caller must use dput to free the entry when it has
1781 * finished using it. %NULL is returned if the dentry does not exist.
1782 */
1783struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1784{
1785        struct dentry *dentry;
1786        unsigned seq;
1787
1788        do {
1789                seq = read_seqbegin(&rename_lock);
1790                dentry = __d_lookup(parent, name);
1791                if (dentry)
1792                        break;
1793        } while (read_seqretry(&rename_lock, seq));
1794        return dentry;
1795}
1796EXPORT_SYMBOL(d_lookup);
1797
1798/**
1799 * __d_lookup - search for a dentry (racy)
1800 * @parent: parent dentry
1801 * @name: qstr of name we wish to find
1802 * Returns: dentry, or NULL
1803 *
1804 * __d_lookup is like d_lookup, however it may (rarely) return a
1805 * false-negative result due to unrelated rename activity.
1806 *
1807 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1808 * however it must be used carefully, eg. with a following d_lookup in
1809 * the case of failure.
1810 *
1811 * __d_lookup callers must be commented.
1812 */
1813struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1814{
1815        unsigned int len = name->len;
1816        unsigned int hash = name->hash;
1817        const unsigned char *str = name->name;
1818        struct hlist_bl_head *b = d_hash(parent, hash);
1819        struct hlist_bl_node *node;
1820        struct dentry *found = NULL;
1821        struct dentry *dentry;
1822
1823        /*
1824         * Note: There is significant duplication with __d_lookup_rcu which is
1825         * required to prevent single threaded performance regressions
1826         * especially on architectures where smp_rmb (in seqcounts) are costly.
1827         * Keep the two functions in sync.
1828         */
1829
1830        /*
1831         * The hash list is protected using RCU.
1832         *
1833         * Take d_lock when comparing a candidate dentry, to avoid races
1834         * with d_move().
1835         *
1836         * It is possible that concurrent renames can mess up our list
1837         * walk here and result in missing our dentry, resulting in the
1838         * false-negative result. d_lookup() protects against concurrent
1839         * renames using rename_lock seqlock.
1840         *
1841         * See Documentation/filesystems/path-lookup.txt for more details.
1842         */
1843        rcu_read_lock();
1844        
1845        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1846                const char *tname;
1847                int tlen;
1848
1849                if (dentry->d_name.hash != hash)
1850                        continue;
1851
1852                spin_lock(&dentry->d_lock);
1853                if (dentry->d_parent != parent)
1854                        goto next;
1855                if (d_unhashed(dentry))
1856                        goto next;
1857
1858                /*
1859                 * It is safe to compare names since d_move() cannot
1860                 * change the qstr (protected by d_lock).
1861                 */
1862                tlen = dentry->d_name.len;
1863                tname = dentry->d_name.name;
1864                if (parent->d_flags & DCACHE_OP_COMPARE) {
1865                        if (parent->d_op->d_compare(parent, parent->d_inode,
1866                                                dentry, dentry->d_inode,
1867                                                tlen, tname, name))
1868                                goto next;
1869                } else {
1870                        if (dentry_cmp(tname, tlen, str, len))
1871                                goto next;
1872                }
1873
1874                dentry->d_count++;
1875                found = dentry;
1876                spin_unlock(&dentry->d_lock);
1877                break;
1878next:
1879                spin_unlock(&dentry->d_lock);
1880        }
1881        rcu_read_unlock();
1882
1883        return found;
1884}
1885
1886/**
1887 * d_hash_and_lookup - hash the qstr then search for a dentry
1888 * @dir: Directory to search in
1889 * @name: qstr of name we wish to find
1890 *
1891 * On hash failure or on lookup failure NULL is returned.
1892 */
1893struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1894{
1895        struct dentry *dentry = NULL;
1896
1897        /*
1898         * Check for a fs-specific hash function. Note that we must
1899         * calculate the standard hash first, as the d_op->d_hash()
1900         * routine may choose to leave the hash value unchanged.
1901         */
1902        name->hash = full_name_hash(name->name, name->len);
1903        if (dir->d_flags & DCACHE_OP_HASH) {
1904                if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1905                        goto out;
1906        }
1907        dentry = d_lookup(dir, name);
1908out:
1909        return dentry;
1910}
1911
1912/**
1913 * d_validate - verify dentry provided from insecure source (deprecated)
1914 * @dentry: The dentry alleged to be valid child of @dparent
1915 * @dparent: The parent dentry (known to be valid)
1916 *
1917 * An insecure source has sent us a dentry, here we verify it and dget() it.
1918 * This is used by ncpfs in its readdir implementation.
1919 * Zero is returned in the dentry is invalid.
1920 *
1921 * This function is slow for big directories, and deprecated, do not use it.
1922 */
1923int d_validate(struct dentry *dentry, struct dentry *dparent)
1924{
1925        struct dentry *child;
1926
1927        spin_lock(&dparent->d_lock);
1928        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1929                if (dentry == child) {
1930                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1931                        __dget_dlock(dentry);
1932                        spin_unlock(&dentry->d_lock);
1933                        spin_unlock(&dparent->d_lock);
1934                        return 1;
1935                }
1936        }
1937        spin_unlock(&dparent->d_lock);
1938
1939        return 0;
1940}
1941EXPORT_SYMBOL(d_validate);
1942
1943/*
1944 * When a file is deleted, we have two options:
1945 * - turn this dentry into a negative dentry
1946 * - unhash this dentry and free it.
1947 *
1948 * Usually, we want to just turn this into
1949 * a negative dentry, but if anybody else is
1950 * currently using the dentry or the inode
1951 * we can't do that and we fall back on removing
1952 * it from the hash queues and waiting for
1953 * it to be deleted later when it has no users
1954 */
1955 
1956/**
1957 * d_delete - delete a dentry
1958 * @dentry: The dentry to delete
1959 *
1960 * Turn the dentry into a negative dentry if possible, otherwise
1961 * remove it from the hash queues so it can be deleted later
1962 */
1963 
1964void d_delete(struct dentry * dentry)
1965{
1966        struct inode *inode;
1967        int isdir = 0;
1968        /*
1969         * Are we the only user?
1970         */
1971again:
1972        spin_lock(&dentry->d_lock);
1973        inode = dentry->d_inode;
1974        isdir = S_ISDIR(inode->i_mode);
1975        if (dentry->d_count == 1) {
1976                if (inode && !spin_trylock(&inode->i_lock)) {
1977                        spin_unlock(&dentry->d_lock);
1978                        cpu_relax();
1979                        goto again;
1980                }
1981                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1982                dentry_unlink_inode(dentry);
1983                fsnotify_nameremove(dentry, isdir);
1984                return;
1985        }
1986
1987        if (!d_unhashed(dentry))
1988                __d_drop(dentry);
1989
1990        spin_unlock(&dentry->d_lock);
1991
1992        fsnotify_nameremove(dentry, isdir);
1993}
1994EXPORT_SYMBOL(d_delete);
1995
1996static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
1997{
1998        BUG_ON(!d_unhashed(entry));
1999        hlist_bl_lock(b);
2000        entry->d_flags |= DCACHE_RCUACCESS;
2001        hlist_bl_add_head_rcu(&entry->d_hash, b);
2002        hlist_bl_unlock(b);
2003}
2004
2005static void _d_rehash(struct dentry * entry)
2006{
2007        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2008}
2009
2010/**
2011 * d_rehash     - add an entry back to the hash
2012 * @entry: dentry to add to the hash
2013 *
2014 * Adds a dentry to the hash according to its name.
2015 */
2016 
2017void d_rehash(struct dentry * entry)
2018{
2019        spin_lock(&entry->d_lock);
2020        _d_rehash(entry);
2021        spin_unlock(&entry->d_lock);
2022}
2023EXPORT_SYMBOL(d_rehash);
2024
2025/**
2026 * dentry_update_name_case - update case insensitive dentry with a new name
2027 * @dentry: dentry to be updated
2028 * @name: new name
2029 *
2030 * Update a case insensitive dentry with new case of name.
2031 *
2032 * dentry must have been returned by d_lookup with name @name. Old and new
2033 * name lengths must match (ie. no d_compare which allows mismatched name
2034 * lengths).
2035 *
2036 * Parent inode i_mutex must be held over d_lookup and into this call (to
2037 * keep renames and concurrent inserts, and readdir(2) away).
2038 */
2039void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2040{
2041        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2042        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2043
2044        spin_lock(&dentry->d_lock);
2045        write_seqcount_begin(&dentry->d_seq);
2046        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2047        write_seqcount_end(&dentry->d_seq);
2048        spin_unlock(&dentry->d_lock);
2049}
2050EXPORT_SYMBOL(dentry_update_name_case);
2051
2052static void switch_names(struct dentry *dentry, struct dentry *target)
2053{
2054        if (dname_external(target)) {
2055                if (dname_external(dentry)) {
2056                        /*
2057                         * Both external: swap the pointers
2058                         */
2059                        swap(target->d_name.name, dentry->d_name.name);
2060                } else {
2061                        /*
2062                         * dentry:internal, target:external.  Steal target's
2063                         * storage and make target internal.
2064                         */
2065                        memcpy(target->d_iname, dentry->d_name.name,
2066                                        dentry->d_name.len + 1);
2067                        dentry->d_name.name = target->d_name.name;
2068                        target->d_name.name = target->d_iname;
2069                }
2070        } else {
2071                if (dname_external(dentry)) {
2072                        /*
2073                         * dentry:external, target:internal.  Give dentry's
2074                         * storage to target and make dentry internal
2075                         */
2076                        memcpy(dentry->d_iname, target->d_name.name,
2077                                        target->d_name.len + 1);
2078                        target->d_name.name = dentry->d_name.name;
2079                        dentry->d_name.name = dentry->d_iname;
2080                } else {
2081                        /*
2082                         * Both are internal.  Just copy target to dentry
2083                         */
2084                        memcpy(dentry->d_iname, target->d_name.name,
2085                                        target->d_name.len + 1);
2086                        dentry->d_name.len = target->d_name.len;
2087                        return;
2088                }
2089        }
2090        swap(dentry->d_name.len, target->d_name.len);
2091}
2092
2093static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2094{
2095        /*
2096         * XXXX: do we really need to take target->d_lock?
2097         */
2098        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2099                spin_lock(&target->d_parent->d_lock);
2100        else {
2101                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2102                        spin_lock(&dentry->d_parent->d_lock);
2103                        spin_lock_nested(&target->d_parent->d_lock,
2104                                                DENTRY_D_LOCK_NESTED);
2105                } else {
2106                        spin_lock(&target->d_parent->d_lock);
2107                        spin_lock_nested(&dentry->d_parent->d_lock,
2108                                                DENTRY_D_LOCK_NESTED);
2109                }
2110        }
2111        if (target < dentry) {
2112                spin_lock_nested(&target->d_lock, 2);
2113                spin_lock_nested(&dentry->d_lock, 3);
2114        } else {
2115                spin_lock_nested(&dentry->d_lock, 2);
2116                spin_lock_nested(&target->d_lock, 3);
2117        }
2118}
2119
2120static void dentry_unlock_parents_for_move(struct dentry *dentry,
2121                                        struct dentry *target)
2122{
2123        if (target->d_parent != dentry->d_parent)
2124                spin_unlock(&dentry->d_parent->d_lock);
2125        if (target->d_parent != target)
2126                spin_unlock(&target->d_parent->d_lock);
2127}
2128
2129/*
2130 * When switching names, the actual string doesn't strictly have to
2131 * be preserved in the target - because we're dropping the target
2132 * anyway. As such, we can just do a simple memcpy() to copy over
2133 * the new name before we switch.
2134 *
2135 * Note that we have to be a lot more careful about getting the hash
2136 * switched - we have to switch the hash value properly even if it
2137 * then no longer matches the actual (corrupted) string of the target.
2138 * The hash value has to match the hash queue that the dentry is on..
2139 */
2140/*
2141 * __d_move - move a dentry
2142 * @dentry: entry to move
2143 * @target: new dentry
2144 *
2145 * Update the dcache to reflect the move of a file name. Negative
2146 * dcache entries should not be moved in this way. Caller must hold
2147 * rename_lock, the i_mutex of the source and target directories,
2148 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2149 */
2150static void __d_move(struct dentry * dentry, struct dentry * target)
2151{
2152        if (!dentry->d_inode)
2153                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2154
2155        BUG_ON(d_ancestor(dentry, target));
2156        BUG_ON(d_ancestor(target, dentry));
2157
2158        dentry_lock_for_move(dentry, target);
2159
2160        write_seqcount_begin(&dentry->d_seq);
2161        write_seqcount_begin(&target->d_seq);
2162
2163        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2164
2165        /*
2166         * Move the dentry to the target hash queue. Don't bother checking
2167         * for the same hash queue because of how unlikely it is.
2168         */
2169        __d_drop(dentry);
2170        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2171
2172        /* Unhash the target: dput() will then get rid of it */
2173        __d_drop(target);
2174
2175        list_del(&dentry->d_u.d_child);
2176        list_del(&target->d_u.d_child);
2177
2178        /* Switch the names.. */
2179        switch_names(dentry, target);
2180        swap(dentry->d_name.hash, target->d_name.hash);
2181
2182        /* ... and switch the parents */
2183        if (IS_ROOT(dentry)) {
2184                dentry->d_parent = target->d_parent;
2185                target->d_parent = target;
2186                INIT_LIST_HEAD(&target->d_u.d_child);
2187        } else {
2188                swap(dentry->d_parent, target->d_parent);
2189
2190                /* And add them back to the (new) parent lists */
2191                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2192        }
2193
2194        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2195
2196        write_seqcount_end(&target->d_seq);
2197        write_seqcount_end(&dentry->d_seq);
2198
2199        dentry_unlock_parents_for_move(dentry, target);
2200        spin_unlock(&target->d_lock);
2201        fsnotify_d_move(dentry);
2202        spin_unlock(&dentry->d_lock);
2203}
2204
2205/*
2206 * d_move - move a dentry
2207 * @dentry: entry to move
2208 * @target: new dentry
2209 *
2210 * Update the dcache to reflect the move of a file name. Negative
2211 * dcache entries should not be moved in this way. See the locking
2212 * requirements for __d_move.
2213 */
2214void d_move(struct dentry *dentry, struct dentry *target)
2215{
2216        write_seqlock(&rename_lock);
2217        __d_move(dentry, target);
2218        write_sequnlock(&rename_lock);
2219}
2220EXPORT_SYMBOL(d_move);
2221
2222/**
2223 * d_ancestor - search for an ancestor
2224 * @p1: ancestor dentry
2225 * @p2: child dentry
2226 *
2227 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2228 * an ancestor of p2, else NULL.
2229 */
2230struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2231{
2232        struct dentry *p;
2233
2234        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2235                if (p->d_parent == p1)
2236                        return p;
2237        }
2238        return NULL;
2239}
2240
2241/*
2242 * This helper attempts to cope with remotely renamed directories
2243 *
2244 * It assumes that the caller is already holding
2245 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2246 *
2247 * Note: If ever the locking in lock_rename() changes, then please
2248 * remember to update this too...
2249 */
2250static struct dentry *__d_unalias(struct inode *inode,
2251                struct dentry *dentry, struct dentry *alias)
2252{
2253        struct mutex *m1 = NULL, *m2 = NULL;
2254        struct dentry *ret;
2255
2256        /* If alias and dentry share a parent, then no extra locks required */
2257        if (alias->d_parent == dentry->d_parent)
2258                goto out_unalias;
2259
2260        /* See lock_rename() */
2261        ret = ERR_PTR(-EBUSY);
2262        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2263                goto out_err;
2264        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2265        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2266                goto out_err;
2267        m2 = &alias->d_parent->d_inode->i_mutex;
2268out_unalias:
2269        __d_move(alias, dentry);
2270        ret = alias;
2271out_err:
2272        spin_unlock(&inode->i_lock);
2273        if (m2)
2274                mutex_unlock(m2);
2275        if (m1)
2276                mutex_unlock(m1);
2277        return ret;
2278}
2279
2280/*
2281 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2282 * named dentry in place of the dentry to be replaced.
2283 * returns with anon->d_lock held!
2284 */
2285static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2286{
2287        struct dentry *dparent, *aparent;
2288
2289        dentry_lock_for_move(anon, dentry);
2290
2291        write_seqcount_begin(&dentry->d_seq);
2292        write_seqcount_begin(&anon->d_seq);
2293
2294        dparent = dentry->d_parent;
2295        aparent = anon->d_parent;
2296
2297        switch_names(dentry, anon);
2298        swap(dentry->d_name.hash, anon->d_name.hash);
2299
2300        dentry->d_parent = (aparent == anon) ? dentry : aparent;
2301        list_del(&dentry->d_u.d_child);
2302        if (!IS_ROOT(dentry))
2303                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2304        else
2305                INIT_LIST_HEAD(&dentry->d_u.d_child);
2306
2307        anon->d_parent = (dparent == dentry) ? anon : dparent;
2308        list_del(&anon->d_u.d_child);
2309        if (!IS_ROOT(anon))
2310                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2311        else
2312                INIT_LIST_HEAD(&anon->d_u.d_child);
2313
2314        write_seqcount_end(&dentry->d_seq);
2315        write_seqcount_end(&anon->d_seq);
2316
2317        dentry_unlock_parents_for_move(anon, dentry);
2318        spin_unlock(&dentry->d_lock);
2319
2320        /* anon->d_lock still locked, returns locked */
2321        anon->d_flags &= ~DCACHE_DISCONNECTED;
2322}
2323
2324/**
2325 * d_materialise_unique - introduce an inode into the tree
2326 * @dentry: candidate dentry
2327 * @inode: inode to bind to the dentry, to which aliases may be attached
2328 *
2329 * Introduces an dentry into the tree, substituting an extant disconnected
2330 * root directory alias in its place if there is one. Caller must hold the
2331 * i_mutex of the parent directory.
2332 */
2333struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2334{
2335        struct dentry *actual;
2336
2337        BUG_ON(!d_unhashed(dentry));
2338
2339        if (!inode) {
2340                actual = dentry;
2341                __d_instantiate(dentry, NULL);
2342                d_rehash(actual);
2343                goto out_nolock;
2344        }
2345
2346        spin_lock(&inode->i_lock);
2347
2348        if (S_ISDIR(inode->i_mode)) {
2349                struct dentry *alias;
2350
2351                /* Does an aliased dentry already exist? */
2352                alias = __d_find_alias(inode, 0);
2353                if (alias) {
2354                        actual = alias;
2355                        write_seqlock(&rename_lock);
2356
2357                        if (d_ancestor(alias, dentry)) {
2358                                /* Check for loops */
2359                                actual = ERR_PTR(-ELOOP);
2360                        } else if (IS_ROOT(alias)) {
2361                                /* Is this an anonymous mountpoint that we
2362                                 * could splice into our tree? */
2363                                __d_materialise_dentry(dentry, alias);
2364                                write_sequnlock(&rename_lock);
2365                                __d_drop(alias);
2366                                goto found;
2367                        } else {
2368                                /* Nope, but we must(!) avoid directory
2369                                 * aliasing */
2370                                actual = __d_unalias(inode, dentry, alias);
2371                        }
2372                        write_sequnlock(&rename_lock);
2373                        if (IS_ERR(actual)) {
2374                                if (PTR_ERR(actual) == -ELOOP)
2375                                        pr_warn_ratelimited(
2376                                                "VFS: Lookup of '%s' in %s %s"
2377                                                " would have caused loop\n",
2378                                                dentry->d_name.name,
2379                                                inode->i_sb->s_type->name,
2380                                                inode->i_sb->s_id);
2381                                dput(alias);
2382                        }
2383                        goto out_nolock;
2384                }
2385        }
2386
2387        /* Add a unique reference */
2388        actual = __d_instantiate_unique(dentry, inode);
2389        if (!actual)
2390                actual = dentry;
2391        else
2392                BUG_ON(!d_unhashed(actual));
2393
2394        spin_lock(&actual->d_lock);
2395found:
2396        _d_rehash(actual);
2397        spin_unlock(&actual->d_lock);
2398        spin_unlock(&inode->i_lock);
2399out_nolock:
2400        if (actual == dentry) {
2401                security_d_instantiate(dentry, inode);
2402                return NULL;
2403        }
2404
2405        iput(inode);
2406        return actual;
2407}
2408EXPORT_SYMBOL_GPL(d_materialise_unique);
2409
2410static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2411{
2412        *buflen -= namelen;
2413        if (*buflen < 0)
2414                return -ENAMETOOLONG;
2415        *buffer -= namelen;
2416        memcpy(*buffer, str, namelen);
2417        return 0;
2418}
2419
2420static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2421{
2422        return prepend(buffer, buflen, name->name, name->len);
2423}
2424
2425/**
2426 * prepend_path - Prepend path string to a buffer
2427 * @path: the dentry/vfsmount to report
2428 * @root: root vfsmnt/dentry
2429 * @buffer: pointer to the end of the buffer
2430 * @buflen: pointer to buffer length
2431 *
2432 * Caller holds the rename_lock.
2433 */
2434static int prepend_path(const struct path *path,
2435                        const struct path *root,
2436                        char **buffer, int *buflen)
2437{
2438        struct dentry *dentry = path->dentry;
2439        struct vfsmount *vfsmnt = path->mnt;
2440        bool slash = false;
2441        int error = 0;
2442
2443        br_read_lock(vfsmount_lock);
2444        while (dentry != root->dentry || vfsmnt != root->mnt) {
2445                struct dentry * parent;
2446
2447                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2448                        /* Global root? */
2449                        if (vfsmnt->mnt_parent == vfsmnt) {
2450                                goto global_root;
2451                        }
2452                        dentry = vfsmnt->mnt_mountpoint;
2453                        vfsmnt = vfsmnt->mnt_parent;
2454                        continue;
2455                }
2456                parent = dentry->d_parent;
2457                prefetch(parent);
2458                spin_lock(&dentry->d_lock);
2459                error = prepend_name(buffer, buflen, &dentry->d_name);
2460                spin_unlock(&dentry->d_lock);
2461                if (!error)
2462                        error = prepend(buffer, buflen, "/", 1);
2463                if (error)
2464                        break;
2465
2466                slash = true;
2467                dentry = parent;
2468        }
2469
2470        if (!error && !slash)
2471                error = prepend(buffer, buflen, "/", 1);
2472
2473out:
2474        br_read_unlock(vfsmount_lock);
2475        return error;
2476
2477global_root:
2478        /*
2479         * Filesystems needing to implement special "root names"
2480         * should do so with ->d_dname()
2481         */
2482        if (IS_ROOT(dentry) &&
2483            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2484                WARN(1, "Root dentry has weird name <%.*s>\n",
2485                     (int) dentry->d_name.len, dentry->d_name.name);
2486        }
2487        if (!slash)
2488                error = prepend(buffer, buflen, "/", 1);
2489        if (!error)
2490                error = vfsmnt->mnt_ns ? 1 : 2;
2491        goto out;
2492}
2493
2494/**
2495 * __d_path - return the path of a dentry
2496 * @path: the dentry/vfsmount to report
2497 * @root: root vfsmnt/dentry
2498 * @buf: buffer to return value in
2499 * @buflen: buffer length
2500 *
2501 * Convert a dentry into an ASCII path name.
2502 *
2503 * Returns a pointer into the buffer or an error code if the
2504 * path was too long.
2505 *
2506 * "buflen" should be positive.
2507 *
2508 * If the path is not reachable from the supplied root, return %NULL.
2509 */
2510char *__d_path(const struct path *path,
2511               const struct path *root,
2512               char *buf, int buflen)
2513{
2514        char *res = buf + buflen;
2515        int error;
2516
2517        prepend(&res, &buflen, "\0", 1);
2518        write_seqlock(&rename_lock);
2519        error = prepend_path(path, root, &res, &buflen);
2520        write_sequnlock(&rename_lock);
2521
2522        if (error < 0)
2523                return ERR_PTR(error);
2524        if (error > 0)
2525                return NULL;
2526        return res;
2527}
2528
2529char *d_absolute_path(const struct path *path,
2530               char *buf, int buflen)
2531{
2532        struct path root = {};
2533        char *res = buf + buflen;
2534        int error;
2535
2536        prepend(&res, &buflen, "\0", 1);
2537        write_seqlock(&rename_lock);
2538        error = prepend_path(path, &root, &res, &buflen);
2539        write_sequnlock(&rename_lock);
2540
2541        if (error > 1)
2542                error = -EINVAL;
2543        if (error < 0)
2544                return ERR_PTR(error);
2545        return res;
2546}
2547
2548/*
2549 * same as __d_path but appends "(deleted)" for unlinked files.
2550 */
2551static int path_with_deleted(const struct path *path,
2552                             const struct path *root,
2553                             char **buf, int *buflen)
2554{
2555        prepend(buf, buflen, "\0", 1);
2556        if (d_unlinked(path->dentry)) {
2557                int error = prepend(buf, buflen, " (deleted)", 10);
2558                if (error)
2559                        return error;
2560        }
2561
2562        return prepend_path(path, root, buf, buflen);
2563}
2564
2565static int prepend_unreachable(char **buffer, int *buflen)
2566{
2567        return prepend(buffer, buflen, "(unreachable)", 13);
2568}
2569
2570/**
2571 * d_path - return the path of a dentry
2572 * @path: path to report
2573 * @buf: buffer to return value in
2574 * @buflen: buffer length
2575 *
2576 * Convert a dentry into an ASCII path name. If the entry has been deleted
2577 * the string " (deleted)" is appended. Note that this is ambiguous.
2578 *
2579 * Returns a pointer into the buffer or an error code if the path was
2580 * too long. Note: Callers should use the returned pointer, not the passed
2581 * in buffer, to use the name! The implementation often starts at an offset
2582 * into the buffer, and may leave 0 bytes at the start.
2583 *
2584 * "buflen" should be positive.
2585 */
2586char *d_path(const struct path *path, char *buf, int buflen)
2587{
2588        char *res = buf + buflen;
2589        struct path root;
2590        int error;
2591
2592        /*
2593         * We have various synthetic filesystems that never get mounted.  On
2594         * these filesystems dentries are never used for lookup purposes, and
2595         * thus don't need to be hashed.  They also don't need a name until a
2596         * user wants to identify the object in /proc/pid/fd/.  The little hack
2597         * below allows us to generate a name for these objects on demand:
2598         */
2599        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2600                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2601
2602        get_fs_root(current->fs, &root);
2603        write_seqlock(&rename_lock);
2604        error = path_with_deleted(path, &root, &res, &buflen);
2605        if (error < 0)
2606                res = ERR_PTR(error);
2607        write_sequnlock(&rename_lock);
2608        path_put(&root);
2609        return res;
2610}
2611EXPORT_SYMBOL(d_path);
2612
2613/**
2614 * d_path_with_unreachable - return the path of a dentry
2615 * @path: path to report
2616 * @buf: buffer to return value in
2617 * @buflen: buffer length
2618 *
2619 * The difference from d_path() is that this prepends "(unreachable)"
2620 * to paths which are unreachable from the current process' root.
2621 */
2622char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2623{
2624        char *res = buf + buflen;
2625        struct path root;
2626        int error;
2627
2628        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2629                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2630
2631        get_fs_root(current->fs, &root);
2632        write_seqlock(&rename_lock);
2633        error = path_with_deleted(path, &root, &res, &buflen);
2634        if (error > 0)
2635                error = prepend_unreachable(&res, &buflen);
2636        write_sequnlock(&rename_lock);
2637        path_put(&root);
2638        if (error)
2639                res =  ERR_PTR(error);
2640
2641        return res;
2642}
2643
2644/*
2645 * Helper function for dentry_operations.d_dname() members
2646 */
2647char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2648                        const char *fmt, ...)
2649{
2650        va_list args;
2651        char temp[64];
2652        int sz;
2653
2654        va_start(args, fmt);
2655        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2656        va_end(args);
2657
2658        if (sz > sizeof(temp) || sz > buflen)
2659                return ERR_PTR(-ENAMETOOLONG);
2660
2661        buffer += buflen - sz;
2662        return memcpy(buffer, temp, sz);
2663}
2664
2665/*
2666 * Write full pathname from the root of the filesystem into the buffer.
2667 */
2668static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2669{
2670        char *end = buf + buflen;
2671        char *retval;
2672
2673        prepend(&end, &buflen, "\0", 1);
2674        if (buflen < 1)
2675                goto Elong;
2676        /* Get '/' right */
2677        retval = end-1;
2678        *retval = '/';
2679
2680        while (!IS_ROOT(dentry)) {
2681                struct dentry *parent = dentry->d_parent;
2682                int error;
2683
2684                prefetch(parent);
2685                spin_lock(&dentry->d_lock);
2686                error = prepend_name(&end, &buflen, &dentry->d_name);
2687                spin_unlock(&dentry->d_lock);
2688                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2689                        goto Elong;
2690
2691                retval = end;
2692                dentry = parent;
2693        }
2694        return retval;
2695Elong:
2696        return ERR_PTR(-ENAMETOOLONG);
2697}
2698
2699char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2700{
2701        char *retval;
2702
2703        write_seqlock(&rename_lock);
2704        retval = __dentry_path(dentry, buf, buflen);
2705        write_sequnlock(&rename_lock);
2706
2707        return retval;
2708}
2709EXPORT_SYMBOL(dentry_path_raw);
2710
2711char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2712{
2713        char *p = NULL;
2714        char *retval;
2715
2716        write_seqlock(&rename_lock);
2717        if (d_unlinked(dentry)) {
2718                p = buf + buflen;
2719                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2720                        goto Elong;
2721                buflen++;
2722        }
2723        retval = __dentry_path(dentry, buf, buflen);
2724        write_sequnlock(&rename_lock);
2725        if (!IS_ERR(retval) && p)
2726                *p = '/';       /* restore '/' overriden with '\0' */
2727        return retval;
2728Elong:
2729        return ERR_PTR(-ENAMETOOLONG);
2730}
2731
2732/*
2733 * NOTE! The user-level library version returns a
2734 * character pointer. The kernel system call just
2735 * returns the length of the buffer filled (which
2736 * includes the ending '\0' character), or a negative
2737 * error value. So libc would do something like
2738 *
2739 *      char *getcwd(char * buf, size_t size)
2740 *      {
2741 *              int retval;
2742 *
2743 *              retval = sys_getcwd(buf, size);
2744 *              if (retval >= 0)
2745 *                      return buf;
2746 *              errno = -retval;
2747 *              return NULL;
2748 *      }
2749 */
2750SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2751{
2752        int error;
2753        struct path pwd, root;
2754        char *page = (char *) __get_free_page(GFP_USER);
2755
2756        if (!page)
2757                return -ENOMEM;
2758
2759        get_fs_root_and_pwd(current->fs, &root, &pwd);
2760
2761        error = -ENOENT;
2762        write_seqlock(&rename_lock);
2763        if (!d_unlinked(pwd.dentry)) {
2764                unsigned long len;
2765                char *cwd = page + PAGE_SIZE;
2766                int buflen = PAGE_SIZE;
2767
2768                prepend(&cwd, &buflen, "\0", 1);
2769                error = prepend_path(&pwd, &root, &cwd, &buflen);
2770                write_sequnlock(&rename_lock);
2771
2772                if (error < 0)
2773                        goto out;
2774
2775                /* Unreachable from current root */
2776                if (error > 0) {
2777                        error = prepend_unreachable(&cwd, &buflen);
2778                        if (error)
2779                                goto out;
2780                }
2781
2782                error = -ERANGE;
2783                len = PAGE_SIZE + page - cwd;
2784                if (len <= size) {
2785                        error = len;
2786                        if (copy_to_user(buf, cwd, len))
2787                                error = -EFAULT;
2788                }
2789        } else {
2790                write_sequnlock(&rename_lock);
2791        }
2792
2793out:
2794        path_put(&pwd);
2795        path_put(&root);
2796        free_page((unsigned long) page);
2797        return error;
2798}
2799
2800/*
2801 * Test whether new_dentry is a subdirectory of old_dentry.
2802 *
2803 * Trivially implemented using the dcache structure
2804 */
2805
2806/**
2807 * is_subdir - is new dentry a subdirectory of old_dentry
2808 * @new_dentry: new dentry
2809 * @old_dentry: old dentry
2810 *
2811 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2812 * Returns 0 otherwise.
2813 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2814 */
2815  
2816int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2817{
2818        int result;
2819        unsigned seq;
2820
2821        if (new_dentry == old_dentry)
2822                return 1;
2823
2824        do {
2825                /* for restarting inner loop in case of seq retry */
2826                seq = read_seqbegin(&rename_lock);
2827                /*
2828                 * Need rcu_readlock to protect against the d_parent trashing
2829                 * due to d_move
2830                 */
2831                rcu_read_lock();
2832                if (d_ancestor(old_dentry, new_dentry))
2833                        result = 1;
2834                else
2835                        result = 0;
2836                rcu_read_unlock();
2837        } while (read_seqretry(&rename_lock, seq));
2838
2839        return result;
2840}
2841
2842int path_is_under(struct path *path1, struct path *path2)
2843{
2844        struct vfsmount *mnt = path1->mnt;
2845        struct dentry *dentry = path1->dentry;
2846        int res;
2847
2848        br_read_lock(vfsmount_lock);
2849        if (mnt != path2->mnt) {
2850                for (;;) {
2851                        if (mnt->mnt_parent == mnt) {
2852                                br_read_unlock(vfsmount_lock);
2853                                return 0;
2854                        }
2855                        if (mnt->mnt_parent == path2->mnt)
2856                                break;
2857                        mnt = mnt->mnt_parent;
2858                }
2859                dentry = mnt->mnt_mountpoint;
2860        }
2861        res = is_subdir(dentry, path2->dentry);
2862        br_read_unlock(vfsmount_lock);
2863        return res;
2864}
2865EXPORT_SYMBOL(path_is_under);
2866
2867void d_genocide(struct dentry *root)
2868{
2869        struct dentry *this_parent;
2870        struct list_head *next;
2871        unsigned seq;
2872        int locked = 0;
2873
2874        seq = read_seqbegin(&rename_lock);
2875again:
2876        this_parent = root;
2877        spin_lock(&this_parent->d_lock);
2878repeat:
2879        next = this_parent->d_subdirs.next;
2880resume:
2881        while (next != &this_parent->d_subdirs) {
2882                struct list_head *tmp = next;
2883                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2884                next = tmp->next;
2885
2886                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2887                if (d_unhashed(dentry) || !dentry->d_inode) {
2888                        spin_unlock(&dentry->d_lock);
2889                        continue;
2890                }
2891                if (!list_empty(&dentry->d_subdirs)) {
2892                        spin_unlock(&this_parent->d_lock);
2893                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2894                        this_parent = dentry;
2895                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2896                        goto repeat;
2897                }
2898                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2899                        dentry->d_flags |= DCACHE_GENOCIDE;
2900                        dentry->d_count--;
2901                }
2902                spin_unlock(&dentry->d_lock);
2903        }
2904        if (this_parent != root) {
2905                struct dentry *child = this_parent;
2906                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2907                        this_parent->d_flags |= DCACHE_GENOCIDE;
2908                        this_parent->d_count--;
2909                }
2910                this_parent = try_to_ascend(this_parent, locked, seq);
2911                if (!this_parent)
2912                        goto rename_retry;
2913                next = child->d_u.d_child.next;
2914                goto resume;
2915        }
2916        spin_unlock(&this_parent->d_lock);
2917        if (!locked && read_seqretry(&rename_lock, seq))
2918                goto rename_retry;
2919        if (locked)
2920                write_sequnlock(&rename_lock);
2921        return;
2922
2923rename_retry:
2924        locked = 1;
2925        write_seqlock(&rename_lock);
2926        goto again;
2927}
2928
2929/**
2930 * find_inode_number - check for dentry with name
2931 * @dir: directory to check
2932 * @name: Name to find.
2933 *
2934 * Check whether a dentry already exists for the given name,
2935 * and return the inode number if it has an inode. Otherwise
2936 * 0 is returned.
2937 *
2938 * This routine is used to post-process directory listings for
2939 * filesystems using synthetic inode numbers, and is necessary
2940 * to keep getcwd() working.
2941 */
2942 
2943ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2944{
2945        struct dentry * dentry;
2946        ino_t ino = 0;
2947
2948        dentry = d_hash_and_lookup(dir, name);
2949        if (dentry) {
2950                if (dentry->d_inode)
2951                        ino = dentry->d_inode->i_ino;
2952                dput(dentry);
2953        }
2954        return ino;
2955}
2956EXPORT_SYMBOL(find_inode_number);
2957
2958static __initdata unsigned long dhash_entries;
2959static int __init set_dhash_entries(char *str)
2960{
2961        if (!str)
2962                return 0;
2963        dhash_entries = simple_strtoul(str, &str, 0);
2964        return 1;
2965}
2966__setup("dhash_entries=", set_dhash_entries);
2967
2968static void __init dcache_init_early(void)
2969{
2970        int loop;
2971
2972        /* If hashes are distributed across NUMA nodes, defer
2973         * hash allocation until vmalloc space is available.
2974         */
2975        if (hashdist)
2976                return;
2977
2978        dentry_hashtable =
2979                alloc_large_system_hash("Dentry cache",
2980                                        sizeof(struct hlist_bl_head),
2981                                        dhash_entries,
2982                                        13,
2983                                        HASH_EARLY,
2984                                        &d_hash_shift,
2985                                        &d_hash_mask,
2986                                        0);
2987
2988        for (loop = 0; loop < (1 << d_hash_shift); loop++)
2989                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
2990}
2991
2992static void __init dcache_init(void)
2993{
2994        int loop;
2995
2996        /* 
2997         * A constructor could be added for stable state like the lists,
2998         * but it is probably not worth it because of the cache nature
2999         * of the dcache. 
3000         */
3001        dentry_cache = KMEM_CACHE(dentry,
3002                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3003
3004        /* Hash may have been set up in dcache_init_early */
3005        if (!hashdist)
3006                return;
3007
3008        dentry_hashtable =
3009                alloc_large_system_hash("Dentry cache",
3010                                        sizeof(struct hlist_bl_head),
3011                                        dhash_entries,
3012                                        13,
3013                                        0,
3014                                        &d_hash_shift,
3015                                        &d_hash_mask,
3016                                        0);
3017
3018        for (loop = 0; loop < (1 << d_hash_shift); loop++)
3019                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3020}
3021
3022/* SLAB cache for __getname() consumers */
3023struct kmem_cache *names_cachep __read_mostly;
3024EXPORT_SYMBOL(names_cachep);
3025
3026EXPORT_SYMBOL(d_genocide);
3027
3028void __init vfs_caches_init_early(void)
3029{
3030        dcache_init_early();
3031        inode_init_early();
3032}
3033
3034void __init vfs_caches_init(unsigned long mempages)
3035{
3036        unsigned long reserve;
3037
3038        /* Base hash sizes on available memory, with a reserve equal to
3039           150% of current kernel size */
3040
3041        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3042        mempages -= reserve;
3043
3044        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3045                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3046
3047        dcache_init();
3048        inode_init();
3049        files_init(mempages);
3050        mnt_init();
3051        bdev_cache_init();
3052        chrdev_init();
3053}
3054
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.