linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/module.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include "internal.h"
  39
  40/*
  41 * Usage:
  42 * dcache->d_inode->i_lock protects:
  43 *   - i_dentry, d_alias, d_inode of aliases
  44 * dcache_hash_bucket lock protects:
  45 *   - the dcache hash table
  46 * s_anon bl list spinlock protects:
  47 *   - the s_anon list (see __d_drop)
  48 * dcache_lru_lock protects:
  49 *   - the dcache lru lists and counters
  50 * d_lock protects:
  51 *   - d_flags
  52 *   - d_name
  53 *   - d_lru
  54 *   - d_count
  55 *   - d_unhashed()
  56 *   - d_parent and d_subdirs
  57 *   - childrens' d_child and d_parent
  58 *   - d_alias, d_inode
  59 *
  60 * Ordering:
  61 * dentry->d_inode->i_lock
  62 *   dentry->d_lock
  63 *     dcache_lru_lock
  64 *     dcache_hash_bucket lock
  65 *     s_anon lock
  66 *
  67 * If there is an ancestor relationship:
  68 * dentry->d_parent->...->d_parent->d_lock
  69 *   ...
  70 *     dentry->d_parent->d_lock
  71 *       dentry->d_lock
  72 *
  73 * If no ancestor relationship:
  74 * if (dentry1 < dentry2)
  75 *   dentry1->d_lock
  76 *     dentry2->d_lock
  77 */
  78int sysctl_vfs_cache_pressure __read_mostly = 100;
  79EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  80
  81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  82__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  83
  84EXPORT_SYMBOL(rename_lock);
  85
  86static struct kmem_cache *dentry_cache __read_mostly;
  87
  88/*
  89 * This is the single most critical data structure when it comes
  90 * to the dcache: the hashtable for lookups. Somebody should try
  91 * to make this good - I've just made it work.
  92 *
  93 * This hash-function tries to avoid losing too many bits of hash
  94 * information, yet avoid using a prime hash-size or similar.
  95 */
  96#define D_HASHBITS     d_hash_shift
  97#define D_HASHMASK     d_hash_mask
  98
  99static unsigned int d_hash_mask __read_mostly;
 100static unsigned int d_hash_shift __read_mostly;
 101
 102static struct hlist_bl_head *dentry_hashtable __read_mostly;
 103
 104static inline struct hlist_bl_head *d_hash(struct dentry *parent,
 105                                        unsigned long hash)
 106{
 107        hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
 108        hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
 109        return dentry_hashtable + (hash & D_HASHMASK);
 110}
 111
 112/* Statistics gathering. */
 113struct dentry_stat_t dentry_stat = {
 114        .age_limit = 45,
 115};
 116
 117static DEFINE_PER_CPU(unsigned int, nr_dentry);
 118
 119#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 120static int get_nr_dentry(void)
 121{
 122        int i;
 123        int sum = 0;
 124        for_each_possible_cpu(i)
 125                sum += per_cpu(nr_dentry, i);
 126        return sum < 0 ? 0 : sum;
 127}
 128
 129int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 130                   size_t *lenp, loff_t *ppos)
 131{
 132        dentry_stat.nr_dentry = get_nr_dentry();
 133        return proc_dointvec(table, write, buffer, lenp, ppos);
 134}
 135#endif
 136
 137static void __d_free(struct rcu_head *head)
 138{
 139        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 140
 141        WARN_ON(!list_empty(&dentry->d_alias));
 142        if (dname_external(dentry))
 143                kfree(dentry->d_name.name);
 144        kmem_cache_free(dentry_cache, dentry); 
 145}
 146
 147/*
 148 * no locks, please.
 149 */
 150static void d_free(struct dentry *dentry)
 151{
 152        BUG_ON(dentry->d_count);
 153        this_cpu_dec(nr_dentry);
 154        if (dentry->d_op && dentry->d_op->d_release)
 155                dentry->d_op->d_release(dentry);
 156
 157        /* if dentry was never visible to RCU, immediate free is OK */
 158        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 159                __d_free(&dentry->d_u.d_rcu);
 160        else
 161                call_rcu(&dentry->d_u.d_rcu, __d_free);
 162}
 163
 164/**
 165 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 166 * @dentry: the target dentry
 167 * After this call, in-progress rcu-walk path lookup will fail. This
 168 * should be called after unhashing, and after changing d_inode (if
 169 * the dentry has not already been unhashed).
 170 */
 171static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 172{
 173        assert_spin_locked(&dentry->d_lock);
 174        /* Go through a barrier */
 175        write_seqcount_barrier(&dentry->d_seq);
 176}
 177
 178/*
 179 * Release the dentry's inode, using the filesystem
 180 * d_iput() operation if defined. Dentry has no refcount
 181 * and is unhashed.
 182 */
 183static void dentry_iput(struct dentry * dentry)
 184        __releases(dentry->d_lock)
 185        __releases(dentry->d_inode->i_lock)
 186{
 187        struct inode *inode = dentry->d_inode;
 188        if (inode) {
 189                dentry->d_inode = NULL;
 190                list_del_init(&dentry->d_alias);
 191                spin_unlock(&dentry->d_lock);
 192                spin_unlock(&inode->i_lock);
 193                if (!inode->i_nlink)
 194                        fsnotify_inoderemove(inode);
 195                if (dentry->d_op && dentry->d_op->d_iput)
 196                        dentry->d_op->d_iput(dentry, inode);
 197                else
 198                        iput(inode);
 199        } else {
 200                spin_unlock(&dentry->d_lock);
 201        }
 202}
 203
 204/*
 205 * Release the dentry's inode, using the filesystem
 206 * d_iput() operation if defined. dentry remains in-use.
 207 */
 208static void dentry_unlink_inode(struct dentry * dentry)
 209        __releases(dentry->d_lock)
 210        __releases(dentry->d_inode->i_lock)
 211{
 212        struct inode *inode = dentry->d_inode;
 213        dentry->d_inode = NULL;
 214        list_del_init(&dentry->d_alias);
 215        dentry_rcuwalk_barrier(dentry);
 216        spin_unlock(&dentry->d_lock);
 217        spin_unlock(&inode->i_lock);
 218        if (!inode->i_nlink)
 219                fsnotify_inoderemove(inode);
 220        if (dentry->d_op && dentry->d_op->d_iput)
 221                dentry->d_op->d_iput(dentry, inode);
 222        else
 223                iput(inode);
 224}
 225
 226/*
 227 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
 228 */
 229static void dentry_lru_add(struct dentry *dentry)
 230{
 231        if (list_empty(&dentry->d_lru)) {
 232                spin_lock(&dcache_lru_lock);
 233                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 234                dentry->d_sb->s_nr_dentry_unused++;
 235                dentry_stat.nr_unused++;
 236                spin_unlock(&dcache_lru_lock);
 237        }
 238}
 239
 240static void __dentry_lru_del(struct dentry *dentry)
 241{
 242        list_del_init(&dentry->d_lru);
 243        dentry->d_sb->s_nr_dentry_unused--;
 244        dentry_stat.nr_unused--;
 245}
 246
 247static void dentry_lru_del(struct dentry *dentry)
 248{
 249        if (!list_empty(&dentry->d_lru)) {
 250                spin_lock(&dcache_lru_lock);
 251                __dentry_lru_del(dentry);
 252                spin_unlock(&dcache_lru_lock);
 253        }
 254}
 255
 256static void dentry_lru_move_tail(struct dentry *dentry)
 257{
 258        spin_lock(&dcache_lru_lock);
 259        if (list_empty(&dentry->d_lru)) {
 260                list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 261                dentry->d_sb->s_nr_dentry_unused++;
 262                dentry_stat.nr_unused++;
 263        } else {
 264                list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 265        }
 266        spin_unlock(&dcache_lru_lock);
 267}
 268
 269/**
 270 * d_kill - kill dentry and return parent
 271 * @dentry: dentry to kill
 272 * @parent: parent dentry
 273 *
 274 * The dentry must already be unhashed and removed from the LRU.
 275 *
 276 * If this is the root of the dentry tree, return NULL.
 277 *
 278 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 279 * d_kill.
 280 */
 281static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 282        __releases(dentry->d_lock)
 283        __releases(parent->d_lock)
 284        __releases(dentry->d_inode->i_lock)
 285{
 286        list_del(&dentry->d_u.d_child);
 287        /*
 288         * Inform try_to_ascend() that we are no longer attached to the
 289         * dentry tree
 290         */
 291        dentry->d_flags |= DCACHE_DISCONNECTED;
 292        if (parent)
 293                spin_unlock(&parent->d_lock);
 294        dentry_iput(dentry);
 295        /*
 296         * dentry_iput drops the locks, at which point nobody (except
 297         * transient RCU lookups) can reach this dentry.
 298         */
 299        d_free(dentry);
 300        return parent;
 301}
 302
 303/**
 304 * d_drop - drop a dentry
 305 * @dentry: dentry to drop
 306 *
 307 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 308 * be found through a VFS lookup any more. Note that this is different from
 309 * deleting the dentry - d_delete will try to mark the dentry negative if
 310 * possible, giving a successful _negative_ lookup, while d_drop will
 311 * just make the cache lookup fail.
 312 *
 313 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 314 * reason (NFS timeouts or autofs deletes).
 315 *
 316 * __d_drop requires dentry->d_lock.
 317 */
 318void __d_drop(struct dentry *dentry)
 319{
 320        if (!d_unhashed(dentry)) {
 321                struct hlist_bl_head *b;
 322                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 323                        b = &dentry->d_sb->s_anon;
 324                else
 325                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 326
 327                hlist_bl_lock(b);
 328                __hlist_bl_del(&dentry->d_hash);
 329                dentry->d_hash.pprev = NULL;
 330                hlist_bl_unlock(b);
 331
 332                dentry_rcuwalk_barrier(dentry);
 333        }
 334}
 335EXPORT_SYMBOL(__d_drop);
 336
 337void d_drop(struct dentry *dentry)
 338{
 339        spin_lock(&dentry->d_lock);
 340        __d_drop(dentry);
 341        spin_unlock(&dentry->d_lock);
 342}
 343EXPORT_SYMBOL(d_drop);
 344
 345/*
 346 * Finish off a dentry we've decided to kill.
 347 * dentry->d_lock must be held, returns with it unlocked.
 348 * If ref is non-zero, then decrement the refcount too.
 349 * Returns dentry requiring refcount drop, or NULL if we're done.
 350 */
 351static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 352        __releases(dentry->d_lock)
 353{
 354        struct inode *inode;
 355        struct dentry *parent;
 356
 357        inode = dentry->d_inode;
 358        if (inode && !spin_trylock(&inode->i_lock)) {
 359relock:
 360                spin_unlock(&dentry->d_lock);
 361                cpu_relax();
 362                return dentry; /* try again with same dentry */
 363        }
 364        if (IS_ROOT(dentry))
 365                parent = NULL;
 366        else
 367                parent = dentry->d_parent;
 368        if (parent && !spin_trylock(&parent->d_lock)) {
 369                if (inode)
 370                        spin_unlock(&inode->i_lock);
 371                goto relock;
 372        }
 373
 374        if (ref)
 375                dentry->d_count--;
 376        /* if dentry was on the d_lru list delete it from there */
 377        dentry_lru_del(dentry);
 378        /* if it was on the hash then remove it */
 379        __d_drop(dentry);
 380        return d_kill(dentry, parent);
 381}
 382
 383/* 
 384 * This is dput
 385 *
 386 * This is complicated by the fact that we do not want to put
 387 * dentries that are no longer on any hash chain on the unused
 388 * list: we'd much rather just get rid of them immediately.
 389 *
 390 * However, that implies that we have to traverse the dentry
 391 * tree upwards to the parents which might _also_ now be
 392 * scheduled for deletion (it may have been only waiting for
 393 * its last child to go away).
 394 *
 395 * This tail recursion is done by hand as we don't want to depend
 396 * on the compiler to always get this right (gcc generally doesn't).
 397 * Real recursion would eat up our stack space.
 398 */
 399
 400/*
 401 * dput - release a dentry
 402 * @dentry: dentry to release 
 403 *
 404 * Release a dentry. This will drop the usage count and if appropriate
 405 * call the dentry unlink method as well as removing it from the queues and
 406 * releasing its resources. If the parent dentries were scheduled for release
 407 * they too may now get deleted.
 408 */
 409void dput(struct dentry *dentry)
 410{
 411        if (!dentry)
 412                return;
 413
 414repeat:
 415        if (dentry->d_count == 1)
 416                might_sleep();
 417        spin_lock(&dentry->d_lock);
 418        BUG_ON(!dentry->d_count);
 419        if (dentry->d_count > 1) {
 420                dentry->d_count--;
 421                spin_unlock(&dentry->d_lock);
 422                return;
 423        }
 424
 425        if (dentry->d_flags & DCACHE_OP_DELETE) {
 426                if (dentry->d_op->d_delete(dentry))
 427                        goto kill_it;
 428        }
 429
 430        /* Unreachable? Get rid of it */
 431        if (d_unhashed(dentry))
 432                goto kill_it;
 433
 434        /* Otherwise leave it cached and ensure it's on the LRU */
 435        dentry->d_flags |= DCACHE_REFERENCED;
 436        dentry_lru_add(dentry);
 437
 438        dentry->d_count--;
 439        spin_unlock(&dentry->d_lock);
 440        return;
 441
 442kill_it:
 443        dentry = dentry_kill(dentry, 1);
 444        if (dentry)
 445                goto repeat;
 446}
 447EXPORT_SYMBOL(dput);
 448
 449/**
 450 * d_invalidate - invalidate a dentry
 451 * @dentry: dentry to invalidate
 452 *
 453 * Try to invalidate the dentry if it turns out to be
 454 * possible. If there are other dentries that can be
 455 * reached through this one we can't delete it and we
 456 * return -EBUSY. On success we return 0.
 457 *
 458 * no dcache lock.
 459 */
 460 
 461int d_invalidate(struct dentry * dentry)
 462{
 463        /*
 464         * If it's already been dropped, return OK.
 465         */
 466        spin_lock(&dentry->d_lock);
 467        if (d_unhashed(dentry)) {
 468                spin_unlock(&dentry->d_lock);
 469                return 0;
 470        }
 471        /*
 472         * Check whether to do a partial shrink_dcache
 473         * to get rid of unused child entries.
 474         */
 475        if (!list_empty(&dentry->d_subdirs)) {
 476                spin_unlock(&dentry->d_lock);
 477                shrink_dcache_parent(dentry);
 478                spin_lock(&dentry->d_lock);
 479        }
 480
 481        /*
 482         * Somebody else still using it?
 483         *
 484         * If it's a directory, we can't drop it
 485         * for fear of somebody re-populating it
 486         * with children (even though dropping it
 487         * would make it unreachable from the root,
 488         * we might still populate it if it was a
 489         * working directory or similar).
 490         */
 491        if (dentry->d_count > 1) {
 492                if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 493                        spin_unlock(&dentry->d_lock);
 494                        return -EBUSY;
 495                }
 496        }
 497
 498        __d_drop(dentry);
 499        spin_unlock(&dentry->d_lock);
 500        return 0;
 501}
 502EXPORT_SYMBOL(d_invalidate);
 503
 504/* This must be called with d_lock held */
 505static inline void __dget_dlock(struct dentry *dentry)
 506{
 507        dentry->d_count++;
 508}
 509
 510static inline void __dget(struct dentry *dentry)
 511{
 512        spin_lock(&dentry->d_lock);
 513        __dget_dlock(dentry);
 514        spin_unlock(&dentry->d_lock);
 515}
 516
 517struct dentry *dget_parent(struct dentry *dentry)
 518{
 519        struct dentry *ret;
 520
 521repeat:
 522        /*
 523         * Don't need rcu_dereference because we re-check it was correct under
 524         * the lock.
 525         */
 526        rcu_read_lock();
 527        ret = dentry->d_parent;
 528        if (!ret) {
 529                rcu_read_unlock();
 530                goto out;
 531        }
 532        spin_lock(&ret->d_lock);
 533        if (unlikely(ret != dentry->d_parent)) {
 534                spin_unlock(&ret->d_lock);
 535                rcu_read_unlock();
 536                goto repeat;
 537        }
 538        rcu_read_unlock();
 539        BUG_ON(!ret->d_count);
 540        ret->d_count++;
 541        spin_unlock(&ret->d_lock);
 542out:
 543        return ret;
 544}
 545EXPORT_SYMBOL(dget_parent);
 546
 547/**
 548 * d_find_alias - grab a hashed alias of inode
 549 * @inode: inode in question
 550 * @want_discon:  flag, used by d_splice_alias, to request
 551 *          that only a DISCONNECTED alias be returned.
 552 *
 553 * If inode has a hashed alias, or is a directory and has any alias,
 554 * acquire the reference to alias and return it. Otherwise return NULL.
 555 * Notice that if inode is a directory there can be only one alias and
 556 * it can be unhashed only if it has no children, or if it is the root
 557 * of a filesystem.
 558 *
 559 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 560 * any other hashed alias over that one unless @want_discon is set,
 561 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 562 */
 563static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 564{
 565        struct dentry *alias, *discon_alias;
 566
 567again:
 568        discon_alias = NULL;
 569        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 570                spin_lock(&alias->d_lock);
 571                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 572                        if (IS_ROOT(alias) &&
 573                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 574                                discon_alias = alias;
 575                        } else if (!want_discon) {
 576                                __dget_dlock(alias);
 577                                spin_unlock(&alias->d_lock);
 578                                return alias;
 579                        }
 580                }
 581                spin_unlock(&alias->d_lock);
 582        }
 583        if (discon_alias) {
 584                alias = discon_alias;
 585                spin_lock(&alias->d_lock);
 586                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 587                        if (IS_ROOT(alias) &&
 588                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 589                                __dget_dlock(alias);
 590                                spin_unlock(&alias->d_lock);
 591                                return alias;
 592                        }
 593                }
 594                spin_unlock(&alias->d_lock);
 595                goto again;
 596        }
 597        return NULL;
 598}
 599
 600struct dentry *d_find_alias(struct inode *inode)
 601{
 602        struct dentry *de = NULL;
 603
 604        if (!list_empty(&inode->i_dentry)) {
 605                spin_lock(&inode->i_lock);
 606                de = __d_find_alias(inode, 0);
 607                spin_unlock(&inode->i_lock);
 608        }
 609        return de;
 610}
 611EXPORT_SYMBOL(d_find_alias);
 612
 613/*
 614 *      Try to kill dentries associated with this inode.
 615 * WARNING: you must own a reference to inode.
 616 */
 617void d_prune_aliases(struct inode *inode)
 618{
 619        struct dentry *dentry;
 620restart:
 621        spin_lock(&inode->i_lock);
 622        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 623                spin_lock(&dentry->d_lock);
 624                if (!dentry->d_count) {
 625                        __dget_dlock(dentry);
 626                        __d_drop(dentry);
 627                        spin_unlock(&dentry->d_lock);
 628                        spin_unlock(&inode->i_lock);
 629                        dput(dentry);
 630                        goto restart;
 631                }
 632                spin_unlock(&dentry->d_lock);
 633        }
 634        spin_unlock(&inode->i_lock);
 635}
 636EXPORT_SYMBOL(d_prune_aliases);
 637
 638/*
 639 * Try to throw away a dentry - free the inode, dput the parent.
 640 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 641 * Releases dentry->d_lock.
 642 *
 643 * This may fail if locks cannot be acquired no problem, just try again.
 644 */
 645static void try_prune_one_dentry(struct dentry *dentry)
 646        __releases(dentry->d_lock)
 647{
 648        struct dentry *parent;
 649
 650        parent = dentry_kill(dentry, 0);
 651        /*
 652         * If dentry_kill returns NULL, we have nothing more to do.
 653         * if it returns the same dentry, trylocks failed. In either
 654         * case, just loop again.
 655         *
 656         * Otherwise, we need to prune ancestors too. This is necessary
 657         * to prevent quadratic behavior of shrink_dcache_parent(), but
 658         * is also expected to be beneficial in reducing dentry cache
 659         * fragmentation.
 660         */
 661        if (!parent)
 662                return;
 663        if (parent == dentry)
 664                return;
 665
 666        /* Prune ancestors. */
 667        dentry = parent;
 668        while (dentry) {
 669                spin_lock(&dentry->d_lock);
 670                if (dentry->d_count > 1) {
 671                        dentry->d_count--;
 672                        spin_unlock(&dentry->d_lock);
 673                        return;
 674                }
 675                dentry = dentry_kill(dentry, 1);
 676        }
 677}
 678
 679static void shrink_dentry_list(struct list_head *list)
 680{
 681        struct dentry *dentry;
 682
 683        rcu_read_lock();
 684        for (;;) {
 685                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 686                if (&dentry->d_lru == list)
 687                        break; /* empty */
 688                spin_lock(&dentry->d_lock);
 689                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 690                        spin_unlock(&dentry->d_lock);
 691                        continue;
 692                }
 693
 694                /*
 695                 * We found an inuse dentry which was not removed from
 696                 * the LRU because of laziness during lookup.  Do not free
 697                 * it - just keep it off the LRU list.
 698                 */
 699                if (dentry->d_count) {
 700                        dentry_lru_del(dentry);
 701                        spin_unlock(&dentry->d_lock);
 702                        continue;
 703                }
 704
 705                rcu_read_unlock();
 706
 707                try_prune_one_dentry(dentry);
 708
 709                rcu_read_lock();
 710        }
 711        rcu_read_unlock();
 712}
 713
 714/**
 715 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
 716 * @sb:         superblock to shrink dentry LRU.
 717 * @count:      number of entries to prune
 718 * @flags:      flags to control the dentry processing
 719 *
 720 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
 721 */
 722static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 723{
 724        /* called from prune_dcache() and shrink_dcache_parent() */
 725        struct dentry *dentry;
 726        LIST_HEAD(referenced);
 727        LIST_HEAD(tmp);
 728        int cnt = *count;
 729
 730relock:
 731        spin_lock(&dcache_lru_lock);
 732        while (!list_empty(&sb->s_dentry_lru)) {
 733                dentry = list_entry(sb->s_dentry_lru.prev,
 734                                struct dentry, d_lru);
 735                BUG_ON(dentry->d_sb != sb);
 736
 737                if (!spin_trylock(&dentry->d_lock)) {
 738                        spin_unlock(&dcache_lru_lock);
 739                        cpu_relax();
 740                        goto relock;
 741                }
 742
 743                /*
 744                 * If we are honouring the DCACHE_REFERENCED flag and the
 745                 * dentry has this flag set, don't free it.  Clear the flag
 746                 * and put it back on the LRU.
 747                 */
 748                if (flags & DCACHE_REFERENCED &&
 749                                dentry->d_flags & DCACHE_REFERENCED) {
 750                        dentry->d_flags &= ~DCACHE_REFERENCED;
 751                        list_move(&dentry->d_lru, &referenced);
 752                        spin_unlock(&dentry->d_lock);
 753                } else {
 754                        list_move_tail(&dentry->d_lru, &tmp);
 755                        spin_unlock(&dentry->d_lock);
 756                        if (!--cnt)
 757                                break;
 758                }
 759                cond_resched_lock(&dcache_lru_lock);
 760        }
 761        if (!list_empty(&referenced))
 762                list_splice(&referenced, &sb->s_dentry_lru);
 763        spin_unlock(&dcache_lru_lock);
 764
 765        shrink_dentry_list(&tmp);
 766
 767        *count = cnt;
 768}
 769
 770/**
 771 * prune_dcache - shrink the dcache
 772 * @count: number of entries to try to free
 773 *
 774 * Shrink the dcache. This is done when we need more memory, or simply when we
 775 * need to unmount something (at which point we need to unuse all dentries).
 776 *
 777 * This function may fail to free any resources if all the dentries are in use.
 778 */
 779static void prune_dcache(int count)
 780{
 781        struct super_block *sb, *p = NULL;
 782        int w_count;
 783        int unused = dentry_stat.nr_unused;
 784        int prune_ratio;
 785        int pruned;
 786
 787        if (unused == 0 || count == 0)
 788                return;
 789        if (count >= unused)
 790                prune_ratio = 1;
 791        else
 792                prune_ratio = unused / count;
 793        spin_lock(&sb_lock);
 794        list_for_each_entry(sb, &super_blocks, s_list) {
 795                if (list_empty(&sb->s_instances))
 796                        continue;
 797                if (sb->s_nr_dentry_unused == 0)
 798                        continue;
 799                sb->s_count++;
 800                /* Now, we reclaim unused dentrins with fairness.
 801                 * We reclaim them same percentage from each superblock.
 802                 * We calculate number of dentries to scan on this sb
 803                 * as follows, but the implementation is arranged to avoid
 804                 * overflows:
 805                 * number of dentries to scan on this sb =
 806                 * count * (number of dentries on this sb /
 807                 * number of dentries in the machine)
 808                 */
 809                spin_unlock(&sb_lock);
 810                if (prune_ratio != 1)
 811                        w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
 812                else
 813                        w_count = sb->s_nr_dentry_unused;
 814                pruned = w_count;
 815                /*
 816                 * We need to be sure this filesystem isn't being unmounted,
 817                 * otherwise we could race with generic_shutdown_super(), and
 818                 * end up holding a reference to an inode while the filesystem
 819                 * is unmounted.  So we try to get s_umount, and make sure
 820                 * s_root isn't NULL.
 821                 */
 822                if (down_read_trylock(&sb->s_umount)) {
 823                        if ((sb->s_root != NULL) &&
 824                            (!list_empty(&sb->s_dentry_lru))) {
 825                                __shrink_dcache_sb(sb, &w_count,
 826                                                DCACHE_REFERENCED);
 827                                pruned -= w_count;
 828                        }
 829                        up_read(&sb->s_umount);
 830                }
 831                spin_lock(&sb_lock);
 832                if (p)
 833                        __put_super(p);
 834                count -= pruned;
 835                p = sb;
 836                /* more work left to do? */
 837                if (count <= 0)
 838                        break;
 839        }
 840        if (p)
 841                __put_super(p);
 842        spin_unlock(&sb_lock);
 843}
 844
 845/**
 846 * shrink_dcache_sb - shrink dcache for a superblock
 847 * @sb: superblock
 848 *
 849 * Shrink the dcache for the specified super block. This is used to free
 850 * the dcache before unmounting a file system.
 851 */
 852void shrink_dcache_sb(struct super_block *sb)
 853{
 854        LIST_HEAD(tmp);
 855
 856        spin_lock(&dcache_lru_lock);
 857        while (!list_empty(&sb->s_dentry_lru)) {
 858                list_splice_init(&sb->s_dentry_lru, &tmp);
 859                spin_unlock(&dcache_lru_lock);
 860                shrink_dentry_list(&tmp);
 861                spin_lock(&dcache_lru_lock);
 862        }
 863        spin_unlock(&dcache_lru_lock);
 864}
 865EXPORT_SYMBOL(shrink_dcache_sb);
 866
 867/*
 868 * destroy a single subtree of dentries for unmount
 869 * - see the comments on shrink_dcache_for_umount() for a description of the
 870 *   locking
 871 */
 872static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 873{
 874        struct dentry *parent;
 875        unsigned detached = 0;
 876
 877        BUG_ON(!IS_ROOT(dentry));
 878
 879        /* detach this root from the system */
 880        spin_lock(&dentry->d_lock);
 881        dentry_lru_del(dentry);
 882        __d_drop(dentry);
 883        spin_unlock(&dentry->d_lock);
 884
 885        for (;;) {
 886                /* descend to the first leaf in the current subtree */
 887                while (!list_empty(&dentry->d_subdirs)) {
 888                        struct dentry *loop;
 889
 890                        /* this is a branch with children - detach all of them
 891                         * from the system in one go */
 892                        spin_lock(&dentry->d_lock);
 893                        list_for_each_entry(loop, &dentry->d_subdirs,
 894                                            d_u.d_child) {
 895                                spin_lock_nested(&loop->d_lock,
 896                                                DENTRY_D_LOCK_NESTED);
 897                                dentry_lru_del(loop);
 898                                __d_drop(loop);
 899                                spin_unlock(&loop->d_lock);
 900                        }
 901                        spin_unlock(&dentry->d_lock);
 902
 903                        /* move to the first child */
 904                        dentry = list_entry(dentry->d_subdirs.next,
 905                                            struct dentry, d_u.d_child);
 906                }
 907
 908                /* consume the dentries from this leaf up through its parents
 909                 * until we find one with children or run out altogether */
 910                do {
 911                        struct inode *inode;
 912
 913                        if (dentry->d_count != 0) {
 914                                printk(KERN_ERR
 915                                       "BUG: Dentry %p{i=%lx,n=%s}"
 916                                       " still in use (%d)"
 917                                       " [unmount of %s %s]\n",
 918                                       dentry,
 919                                       dentry->d_inode ?
 920                                       dentry->d_inode->i_ino : 0UL,
 921                                       dentry->d_name.name,
 922                                       dentry->d_count,
 923                                       dentry->d_sb->s_type->name,
 924                                       dentry->d_sb->s_id);
 925                                BUG();
 926                        }
 927
 928                        if (IS_ROOT(dentry)) {
 929                                parent = NULL;
 930                                list_del(&dentry->d_u.d_child);
 931                        } else {
 932                                parent = dentry->d_parent;
 933                                spin_lock(&parent->d_lock);
 934                                parent->d_count--;
 935                                list_del(&dentry->d_u.d_child);
 936                                spin_unlock(&parent->d_lock);
 937                        }
 938
 939                        detached++;
 940
 941                        inode = dentry->d_inode;
 942                        if (inode) {
 943                                dentry->d_inode = NULL;
 944                                list_del_init(&dentry->d_alias);
 945                                if (dentry->d_op && dentry->d_op->d_iput)
 946                                        dentry->d_op->d_iput(dentry, inode);
 947                                else
 948                                        iput(inode);
 949                        }
 950
 951                        d_free(dentry);
 952
 953                        /* finished when we fall off the top of the tree,
 954                         * otherwise we ascend to the parent and move to the
 955                         * next sibling if there is one */
 956                        if (!parent)
 957                                return;
 958                        dentry = parent;
 959                } while (list_empty(&dentry->d_subdirs));
 960
 961                dentry = list_entry(dentry->d_subdirs.next,
 962                                    struct dentry, d_u.d_child);
 963        }
 964}
 965
 966/*
 967 * destroy the dentries attached to a superblock on unmounting
 968 * - we don't need to use dentry->d_lock because:
 969 *   - the superblock is detached from all mountings and open files, so the
 970 *     dentry trees will not be rearranged by the VFS
 971 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 972 *     any dentries belonging to this superblock that it comes across
 973 *   - the filesystem itself is no longer permitted to rearrange the dentries
 974 *     in this superblock
 975 */
 976void shrink_dcache_for_umount(struct super_block *sb)
 977{
 978        struct dentry *dentry;
 979
 980        if (down_read_trylock(&sb->s_umount))
 981                BUG();
 982
 983        dentry = sb->s_root;
 984        sb->s_root = NULL;
 985        spin_lock(&dentry->d_lock);
 986        dentry->d_count--;
 987        spin_unlock(&dentry->d_lock);
 988        shrink_dcache_for_umount_subtree(dentry);
 989
 990        while (!hlist_bl_empty(&sb->s_anon)) {
 991                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 992                shrink_dcache_for_umount_subtree(dentry);
 993        }
 994}
 995
 996/*
 997 * This tries to ascend one level of parenthood, but
 998 * we can race with renaming, so we need to re-check
 999 * the parenthood after dropping the lock and check
1000 * that the sequence number still matches.
1001 */
1002static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1003{
1004        struct dentry *new = old->d_parent;
1005
1006        rcu_read_lock();
1007        spin_unlock(&old->d_lock);
1008        spin_lock(&new->d_lock);
1009
1010        /*
1011         * might go back up the wrong parent if we have had a rename
1012         * or deletion
1013         */
1014        if (new != old->d_parent ||
1015                 (old->d_flags & DCACHE_DISCONNECTED) ||
1016                 (!locked && read_seqretry(&rename_lock, seq))) {
1017                spin_unlock(&new->d_lock);
1018                new = NULL;
1019        }
1020        rcu_read_unlock();
1021        return new;
1022}
1023
1024
1025/*
1026 * Search for at least 1 mount point in the dentry's subdirs.
1027 * We descend to the next level whenever the d_subdirs
1028 * list is non-empty and continue searching.
1029 */
1030 
1031/**
1032 * have_submounts - check for mounts over a dentry
1033 * @parent: dentry to check.
1034 *
1035 * Return true if the parent or its subdirectories contain
1036 * a mount point
1037 */
1038int have_submounts(struct dentry *parent)
1039{
1040        struct dentry *this_parent;
1041        struct list_head *next;
1042        unsigned seq;
1043        int locked = 0;
1044
1045        seq = read_seqbegin(&rename_lock);
1046again:
1047        this_parent = parent;
1048
1049        if (d_mountpoint(parent))
1050                goto positive;
1051        spin_lock(&this_parent->d_lock);
1052repeat:
1053        next = this_parent->d_subdirs.next;
1054resume:
1055        while (next != &this_parent->d_subdirs) {
1056                struct list_head *tmp = next;
1057                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1058                next = tmp->next;
1059
1060                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1061                /* Have we found a mount point ? */
1062                if (d_mountpoint(dentry)) {
1063                        spin_unlock(&dentry->d_lock);
1064                        spin_unlock(&this_parent->d_lock);
1065                        goto positive;
1066                }
1067                if (!list_empty(&dentry->d_subdirs)) {
1068                        spin_unlock(&this_parent->d_lock);
1069                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1070                        this_parent = dentry;
1071                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1072                        goto repeat;
1073                }
1074                spin_unlock(&dentry->d_lock);
1075        }
1076        /*
1077         * All done at this level ... ascend and resume the search.
1078         */
1079        if (this_parent != parent) {
1080                struct dentry *child = this_parent;
1081                this_parent = try_to_ascend(this_parent, locked, seq);
1082                if (!this_parent)
1083                        goto rename_retry;
1084                next = child->d_u.d_child.next;
1085                goto resume;
1086        }
1087        spin_unlock(&this_parent->d_lock);
1088        if (!locked && read_seqretry(&rename_lock, seq))
1089                goto rename_retry;
1090        if (locked)
1091                write_sequnlock(&rename_lock);
1092        return 0; /* No mount points found in tree */
1093positive:
1094        if (!locked && read_seqretry(&rename_lock, seq))
1095                goto rename_retry;
1096        if (locked)
1097                write_sequnlock(&rename_lock);
1098        return 1;
1099
1100rename_retry:
1101        locked = 1;
1102        write_seqlock(&rename_lock);
1103        goto again;
1104}
1105EXPORT_SYMBOL(have_submounts);
1106
1107/*
1108 * Search the dentry child list for the specified parent,
1109 * and move any unused dentries to the end of the unused
1110 * list for prune_dcache(). We descend to the next level
1111 * whenever the d_subdirs list is non-empty and continue
1112 * searching.
1113 *
1114 * It returns zero iff there are no unused children,
1115 * otherwise  it returns the number of children moved to
1116 * the end of the unused list. This may not be the total
1117 * number of unused children, because select_parent can
1118 * drop the lock and return early due to latency
1119 * constraints.
1120 */
1121static int select_parent(struct dentry * parent)
1122{
1123        struct dentry *this_parent;
1124        struct list_head *next;
1125        unsigned seq;
1126        int found = 0;
1127        int locked = 0;
1128
1129        seq = read_seqbegin(&rename_lock);
1130again:
1131        this_parent = parent;
1132        spin_lock(&this_parent->d_lock);
1133repeat:
1134        next = this_parent->d_subdirs.next;
1135resume:
1136        while (next != &this_parent->d_subdirs) {
1137                struct list_head *tmp = next;
1138                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1139                next = tmp->next;
1140
1141                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1142
1143                /* 
1144                 * move only zero ref count dentries to the end 
1145                 * of the unused list for prune_dcache
1146                 */
1147                if (!dentry->d_count) {
1148                        dentry_lru_move_tail(dentry);
1149                        found++;
1150                } else {
1151                        dentry_lru_del(dentry);
1152                }
1153
1154                /*
1155                 * We can return to the caller if we have found some (this
1156                 * ensures forward progress). We'll be coming back to find
1157                 * the rest.
1158                 */
1159                if (found && need_resched()) {
1160                        spin_unlock(&dentry->d_lock);
1161                        goto out;
1162                }
1163
1164                /*
1165                 * Descend a level if the d_subdirs list is non-empty.
1166                 */
1167                if (!list_empty(&dentry->d_subdirs)) {
1168                        spin_unlock(&this_parent->d_lock);
1169                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1170                        this_parent = dentry;
1171                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1172                        goto repeat;
1173                }
1174
1175                spin_unlock(&dentry->d_lock);
1176        }
1177        /*
1178         * All done at this level ... ascend and resume the search.
1179         */
1180        if (this_parent != parent) {
1181                struct dentry *child = this_parent;
1182                this_parent = try_to_ascend(this_parent, locked, seq);
1183                if (!this_parent)
1184                        goto rename_retry;
1185                next = child->d_u.d_child.next;
1186                goto resume;
1187        }
1188out:
1189        spin_unlock(&this_parent->d_lock);
1190        if (!locked && read_seqretry(&rename_lock, seq))
1191                goto rename_retry;
1192        if (locked)
1193                write_sequnlock(&rename_lock);
1194        return found;
1195
1196rename_retry:
1197        if (found)
1198                return found;
1199        locked = 1;
1200        write_seqlock(&rename_lock);
1201        goto again;
1202}
1203
1204/**
1205 * shrink_dcache_parent - prune dcache
1206 * @parent: parent of entries to prune
1207 *
1208 * Prune the dcache to remove unused children of the parent dentry.
1209 */
1210 
1211void shrink_dcache_parent(struct dentry * parent)
1212{
1213        struct super_block *sb = parent->d_sb;
1214        int found;
1215
1216        while ((found = select_parent(parent)) != 0)
1217                __shrink_dcache_sb(sb, &found, 0);
1218}
1219EXPORT_SYMBOL(shrink_dcache_parent);
1220
1221/*
1222 * Scan `nr' dentries and return the number which remain.
1223 *
1224 * We need to avoid reentering the filesystem if the caller is performing a
1225 * GFP_NOFS allocation attempt.  One example deadlock is:
1226 *
1227 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1228 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1229 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1230 *
1231 * In this case we return -1 to tell the caller that we baled.
1232 */
1233static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1234{
1235        if (nr) {
1236                if (!(gfp_mask & __GFP_FS))
1237                        return -1;
1238                prune_dcache(nr);
1239        }
1240
1241        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1242}
1243
1244static struct shrinker dcache_shrinker = {
1245        .shrink = shrink_dcache_memory,
1246        .seeks = DEFAULT_SEEKS,
1247};
1248
1249/**
1250 * d_alloc      -       allocate a dcache entry
1251 * @parent: parent of entry to allocate
1252 * @name: qstr of the name
1253 *
1254 * Allocates a dentry. It returns %NULL if there is insufficient memory
1255 * available. On a success the dentry is returned. The name passed in is
1256 * copied and the copy passed in may be reused after this call.
1257 */
1258 
1259struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1260{
1261        struct dentry *dentry;
1262        char *dname;
1263
1264        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1265        if (!dentry)
1266                return NULL;
1267
1268        if (name->len > DNAME_INLINE_LEN-1) {
1269                dname = kmalloc(name->len + 1, GFP_KERNEL);
1270                if (!dname) {
1271                        kmem_cache_free(dentry_cache, dentry); 
1272                        return NULL;
1273                }
1274        } else  {
1275                dname = dentry->d_iname;
1276        }       
1277        dentry->d_name.name = dname;
1278
1279        dentry->d_name.len = name->len;
1280        dentry->d_name.hash = name->hash;
1281        memcpy(dname, name->name, name->len);
1282        dname[name->len] = 0;
1283
1284        dentry->d_count = 1;
1285        dentry->d_flags = 0;
1286        spin_lock_init(&dentry->d_lock);
1287        seqcount_init(&dentry->d_seq);
1288        dentry->d_inode = NULL;
1289        dentry->d_parent = NULL;
1290        dentry->d_sb = NULL;
1291        dentry->d_op = NULL;
1292        dentry->d_fsdata = NULL;
1293        INIT_HLIST_BL_NODE(&dentry->d_hash);
1294        INIT_LIST_HEAD(&dentry->d_lru);
1295        INIT_LIST_HEAD(&dentry->d_subdirs);
1296        INIT_LIST_HEAD(&dentry->d_alias);
1297        INIT_LIST_HEAD(&dentry->d_u.d_child);
1298
1299        if (parent) {
1300                spin_lock(&parent->d_lock);
1301                /*
1302                 * don't need child lock because it is not subject
1303                 * to concurrency here
1304                 */
1305                __dget_dlock(parent);
1306                dentry->d_parent = parent;
1307                dentry->d_sb = parent->d_sb;
1308                d_set_d_op(dentry, dentry->d_sb->s_d_op);
1309                list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1310                spin_unlock(&parent->d_lock);
1311        }
1312
1313        this_cpu_inc(nr_dentry);
1314
1315        return dentry;
1316}
1317EXPORT_SYMBOL(d_alloc);
1318
1319struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1320{
1321        struct dentry *dentry = d_alloc(NULL, name);
1322        if (dentry) {
1323                dentry->d_sb = sb;
1324                d_set_d_op(dentry, dentry->d_sb->s_d_op);
1325                dentry->d_parent = dentry;
1326                dentry->d_flags |= DCACHE_DISCONNECTED;
1327        }
1328        return dentry;
1329}
1330EXPORT_SYMBOL(d_alloc_pseudo);
1331
1332struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1333{
1334        struct qstr q;
1335
1336        q.name = name;
1337        q.len = strlen(name);
1338        q.hash = full_name_hash(q.name, q.len);
1339        return d_alloc(parent, &q);
1340}
1341EXPORT_SYMBOL(d_alloc_name);
1342
1343void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1344{
1345        WARN_ON_ONCE(dentry->d_op);
1346        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1347                                DCACHE_OP_COMPARE       |
1348                                DCACHE_OP_REVALIDATE    |
1349                                DCACHE_OP_DELETE ));
1350        dentry->d_op = op;
1351        if (!op)
1352                return;
1353        if (op->d_hash)
1354                dentry->d_flags |= DCACHE_OP_HASH;
1355        if (op->d_compare)
1356                dentry->d_flags |= DCACHE_OP_COMPARE;
1357        if (op->d_revalidate)
1358                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1359        if (op->d_delete)
1360                dentry->d_flags |= DCACHE_OP_DELETE;
1361
1362}
1363EXPORT_SYMBOL(d_set_d_op);
1364
1365static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1366{
1367        spin_lock(&dentry->d_lock);
1368        if (inode) {
1369                if (unlikely(IS_AUTOMOUNT(inode)))
1370                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1371                list_add(&dentry->d_alias, &inode->i_dentry);
1372        }
1373        dentry->d_inode = inode;
1374        dentry_rcuwalk_barrier(dentry);
1375        spin_unlock(&dentry->d_lock);
1376        fsnotify_d_instantiate(dentry, inode);
1377}
1378
1379/**
1380 * d_instantiate - fill in inode information for a dentry
1381 * @entry: dentry to complete
1382 * @inode: inode to attach to this dentry
1383 *
1384 * Fill in inode information in the entry.
1385 *
1386 * This turns negative dentries into productive full members
1387 * of society.
1388 *
1389 * NOTE! This assumes that the inode count has been incremented
1390 * (or otherwise set) by the caller to indicate that it is now
1391 * in use by the dcache.
1392 */
1393 
1394void d_instantiate(struct dentry *entry, struct inode * inode)
1395{
1396        BUG_ON(!list_empty(&entry->d_alias));
1397        if (inode)
1398                spin_lock(&inode->i_lock);
1399        __d_instantiate(entry, inode);
1400        if (inode)
1401                spin_unlock(&inode->i_lock);
1402        security_d_instantiate(entry, inode);
1403}
1404EXPORT_SYMBOL(d_instantiate);
1405
1406/**
1407 * d_instantiate_unique - instantiate a non-aliased dentry
1408 * @entry: dentry to instantiate
1409 * @inode: inode to attach to this dentry
1410 *
1411 * Fill in inode information in the entry. On success, it returns NULL.
1412 * If an unhashed alias of "entry" already exists, then we return the
1413 * aliased dentry instead and drop one reference to inode.
1414 *
1415 * Note that in order to avoid conflicts with rename() etc, the caller
1416 * had better be holding the parent directory semaphore.
1417 *
1418 * This also assumes that the inode count has been incremented
1419 * (or otherwise set) by the caller to indicate that it is now
1420 * in use by the dcache.
1421 */
1422static struct dentry *__d_instantiate_unique(struct dentry *entry,
1423                                             struct inode *inode)
1424{
1425        struct dentry *alias;
1426        int len = entry->d_name.len;
1427        const char *name = entry->d_name.name;
1428        unsigned int hash = entry->d_name.hash;
1429
1430        if (!inode) {
1431                __d_instantiate(entry, NULL);
1432                return NULL;
1433        }
1434
1435        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1436                struct qstr *qstr = &alias->d_name;
1437
1438                /*
1439                 * Don't need alias->d_lock here, because aliases with
1440                 * d_parent == entry->d_parent are not subject to name or
1441                 * parent changes, because the parent inode i_mutex is held.
1442                 */
1443                if (qstr->hash != hash)
1444                        continue;
1445                if (alias->d_parent != entry->d_parent)
1446                        continue;
1447                if (dentry_cmp(qstr->name, qstr->len, name, len))
1448                        continue;
1449                __dget(alias);
1450                return alias;
1451        }
1452
1453        __d_instantiate(entry, inode);
1454        return NULL;
1455}
1456
1457struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1458{
1459        struct dentry *result;
1460
1461        BUG_ON(!list_empty(&entry->d_alias));
1462
1463        if (inode)
1464                spin_lock(&inode->i_lock);
1465        result = __d_instantiate_unique(entry, inode);
1466        if (inode)
1467                spin_unlock(&inode->i_lock);
1468
1469        if (!result) {
1470                security_d_instantiate(entry, inode);
1471                return NULL;
1472        }
1473
1474        BUG_ON(!d_unhashed(result));
1475        iput(inode);
1476        return result;
1477}
1478
1479EXPORT_SYMBOL(d_instantiate_unique);
1480
1481/**
1482 * d_alloc_root - allocate root dentry
1483 * @root_inode: inode to allocate the root for
1484 *
1485 * Allocate a root ("/") dentry for the inode given. The inode is
1486 * instantiated and returned. %NULL is returned if there is insufficient
1487 * memory or the inode passed is %NULL.
1488 */
1489 
1490struct dentry * d_alloc_root(struct inode * root_inode)
1491{
1492        struct dentry *res = NULL;
1493
1494        if (root_inode) {
1495                static const struct qstr name = { .name = "/", .len = 1 };
1496
1497                res = d_alloc(NULL, &name);
1498                if (res) {
1499                        res->d_sb = root_inode->i_sb;
1500                        d_set_d_op(res, res->d_sb->s_d_op);
1501                        res->d_parent = res;
1502                        d_instantiate(res, root_inode);
1503                }
1504        }
1505        return res;
1506}
1507EXPORT_SYMBOL(d_alloc_root);
1508
1509static struct dentry * __d_find_any_alias(struct inode *inode)
1510{
1511        struct dentry *alias;
1512
1513        if (list_empty(&inode->i_dentry))
1514                return NULL;
1515        alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1516        __dget(alias);
1517        return alias;
1518}
1519
1520static struct dentry * d_find_any_alias(struct inode *inode)
1521{
1522        struct dentry *de;
1523
1524        spin_lock(&inode->i_lock);
1525        de = __d_find_any_alias(inode);
1526        spin_unlock(&inode->i_lock);
1527        return de;
1528}
1529
1530
1531/**
1532 * d_obtain_alias - find or allocate a dentry for a given inode
1533 * @inode: inode to allocate the dentry for
1534 *
1535 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1536 * similar open by handle operations.  The returned dentry may be anonymous,
1537 * or may have a full name (if the inode was already in the cache).
1538 *
1539 * When called on a directory inode, we must ensure that the inode only ever
1540 * has one dentry.  If a dentry is found, that is returned instead of
1541 * allocating a new one.
1542 *
1543 * On successful return, the reference to the inode has been transferred
1544 * to the dentry.  In case of an error the reference on the inode is released.
1545 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1546 * be passed in and will be the error will be propagate to the return value,
1547 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1548 */
1549struct dentry *d_obtain_alias(struct inode *inode)
1550{
1551        static const struct qstr anonstring = { .name = "" };
1552        struct dentry *tmp;
1553        struct dentry *res;
1554
1555        if (!inode)
1556                return ERR_PTR(-ESTALE);
1557        if (IS_ERR(inode))
1558                return ERR_CAST(inode);
1559
1560        res = d_find_any_alias(inode);
1561        if (res)
1562                goto out_iput;
1563
1564        tmp = d_alloc(NULL, &anonstring);
1565        if (!tmp) {
1566                res = ERR_PTR(-ENOMEM);
1567                goto out_iput;
1568        }
1569        tmp->d_parent = tmp; /* make sure dput doesn't croak */
1570
1571
1572        spin_lock(&inode->i_lock);
1573        res = __d_find_any_alias(inode);
1574        if (res) {
1575                spin_unlock(&inode->i_lock);
1576                dput(tmp);
1577                goto out_iput;
1578        }
1579
1580        /* attach a disconnected dentry */
1581        spin_lock(&tmp->d_lock);
1582        tmp->d_sb = inode->i_sb;
1583        d_set_d_op(tmp, tmp->d_sb->s_d_op);
1584        tmp->d_inode = inode;
1585        tmp->d_flags |= DCACHE_DISCONNECTED;
1586        list_add(&tmp->d_alias, &inode->i_dentry);
1587        hlist_bl_lock(&tmp->d_sb->s_anon);
1588        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1589        hlist_bl_unlock(&tmp->d_sb->s_anon);
1590        spin_unlock(&tmp->d_lock);
1591        spin_unlock(&inode->i_lock);
1592        security_d_instantiate(tmp, inode);
1593
1594        return tmp;
1595
1596 out_iput:
1597        if (res && !IS_ERR(res))
1598                security_d_instantiate(res, inode);
1599        iput(inode);
1600        return res;
1601}
1602EXPORT_SYMBOL(d_obtain_alias);
1603
1604/**
1605 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1606 * @inode:  the inode which may have a disconnected dentry
1607 * @dentry: a negative dentry which we want to point to the inode.
1608 *
1609 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1610 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1611 * and return it, else simply d_add the inode to the dentry and return NULL.
1612 *
1613 * This is needed in the lookup routine of any filesystem that is exportable
1614 * (via knfsd) so that we can build dcache paths to directories effectively.
1615 *
1616 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1617 * is returned.  This matches the expected return value of ->lookup.
1618 *
1619 */
1620struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1621{
1622        struct dentry *new = NULL;
1623
1624        if (inode && S_ISDIR(inode->i_mode)) {
1625                spin_lock(&inode->i_lock);
1626                new = __d_find_alias(inode, 1);
1627                if (new) {
1628                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1629                        spin_unlock(&inode->i_lock);
1630                        security_d_instantiate(new, inode);
1631                        d_move(new, dentry);
1632                        iput(inode);
1633                } else {
1634                        /* already taking inode->i_lock, so d_add() by hand */
1635                        __d_instantiate(dentry, inode);
1636                        spin_unlock(&inode->i_lock);
1637                        security_d_instantiate(dentry, inode);
1638                        d_rehash(dentry);
1639                }
1640        } else
1641                d_add(dentry, inode);
1642        return new;
1643}
1644EXPORT_SYMBOL(d_splice_alias);
1645
1646/**
1647 * d_add_ci - lookup or allocate new dentry with case-exact name
1648 * @inode:  the inode case-insensitive lookup has found
1649 * @dentry: the negative dentry that was passed to the parent's lookup func
1650 * @name:   the case-exact name to be associated with the returned dentry
1651 *
1652 * This is to avoid filling the dcache with case-insensitive names to the
1653 * same inode, only the actual correct case is stored in the dcache for
1654 * case-insensitive filesystems.
1655 *
1656 * For a case-insensitive lookup match and if the the case-exact dentry
1657 * already exists in in the dcache, use it and return it.
1658 *
1659 * If no entry exists with the exact case name, allocate new dentry with
1660 * the exact case, and return the spliced entry.
1661 */
1662struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1663                        struct qstr *name)
1664{
1665        int error;
1666        struct dentry *found;
1667        struct dentry *new;
1668
1669        /*
1670         * First check if a dentry matching the name already exists,
1671         * if not go ahead and create it now.
1672         */
1673        found = d_hash_and_lookup(dentry->d_parent, name);
1674        if (!found) {
1675                new = d_alloc(dentry->d_parent, name);
1676                if (!new) {
1677                        error = -ENOMEM;
1678                        goto err_out;
1679                }
1680
1681                found = d_splice_alias(inode, new);
1682                if (found) {
1683                        dput(new);
1684                        return found;
1685                }
1686                return new;
1687        }
1688
1689        /*
1690         * If a matching dentry exists, and it's not negative use it.
1691         *
1692         * Decrement the reference count to balance the iget() done
1693         * earlier on.
1694         */
1695        if (found->d_inode) {
1696                if (unlikely(found->d_inode != inode)) {
1697                        /* This can't happen because bad inodes are unhashed. */
1698                        BUG_ON(!is_bad_inode(inode));
1699                        BUG_ON(!is_bad_inode(found->d_inode));
1700                }
1701                iput(inode);
1702                return found;
1703        }
1704
1705        /*
1706         * Negative dentry: instantiate it unless the inode is a directory and
1707         * already has a dentry.
1708         */
1709        spin_lock(&inode->i_lock);
1710        if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1711                __d_instantiate(found, inode);
1712                spin_unlock(&inode->i_lock);
1713                security_d_instantiate(found, inode);
1714                return found;
1715        }
1716
1717        /*
1718         * In case a directory already has a (disconnected) entry grab a
1719         * reference to it, move it in place and use it.
1720         */
1721        new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1722        __dget(new);
1723        spin_unlock(&inode->i_lock);
1724        security_d_instantiate(found, inode);
1725        d_move(new, found);
1726        iput(inode);
1727        dput(found);
1728        return new;
1729
1730err_out:
1731        iput(inode);
1732        return ERR_PTR(error);
1733}
1734EXPORT_SYMBOL(d_add_ci);
1735
1736/**
1737 * __d_lookup_rcu - search for a dentry (racy, store-free)
1738 * @parent: parent dentry
1739 * @name: qstr of name we wish to find
1740 * @seq: returns d_seq value at the point where the dentry was found
1741 * @inode: returns dentry->d_inode when the inode was found valid.
1742 * Returns: dentry, or NULL
1743 *
1744 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1745 * resolution (store-free path walking) design described in
1746 * Documentation/filesystems/path-lookup.txt.
1747 *
1748 * This is not to be used outside core vfs.
1749 *
1750 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1751 * held, and rcu_read_lock held. The returned dentry must not be stored into
1752 * without taking d_lock and checking d_seq sequence count against @seq
1753 * returned here.
1754 *
1755 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1756 * function.
1757 *
1758 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1759 * the returned dentry, so long as its parent's seqlock is checked after the
1760 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1761 * is formed, giving integrity down the path walk.
1762 */
1763struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1764                                unsigned *seq, struct inode **inode)
1765{
1766        unsigned int len = name->len;
1767        unsigned int hash = name->hash;
1768        const unsigned char *str = name->name;
1769        struct hlist_bl_head *b = d_hash(parent, hash);
1770        struct hlist_bl_node *node;
1771        struct dentry *dentry;
1772
1773        /*
1774         * Note: There is significant duplication with __d_lookup_rcu which is
1775         * required to prevent single threaded performance regressions
1776         * especially on architectures where smp_rmb (in seqcounts) are costly.
1777         * Keep the two functions in sync.
1778         */
1779
1780        /*
1781         * The hash list is protected using RCU.
1782         *
1783         * Carefully use d_seq when comparing a candidate dentry, to avoid
1784         * races with d_move().
1785         *
1786         * It is possible that concurrent renames can mess up our list
1787         * walk here and result in missing our dentry, resulting in the
1788         * false-negative result. d_lookup() protects against concurrent
1789         * renames using rename_lock seqlock.
1790         *
1791         * See Documentation/filesystems/path-lookup.txt for more details.
1792         */
1793        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1794                struct inode *i;
1795                const char *tname;
1796                int tlen;
1797
1798                if (dentry->d_name.hash != hash)
1799                        continue;
1800
1801seqretry:
1802                *seq = read_seqcount_begin(&dentry->d_seq);
1803                if (dentry->d_parent != parent)
1804                        continue;
1805                if (d_unhashed(dentry))
1806                        continue;
1807                tlen = dentry->d_name.len;
1808                tname = dentry->d_name.name;
1809                i = dentry->d_inode;
1810                prefetch(tname);
1811                if (i)
1812                        prefetch(i);
1813                /*
1814                 * This seqcount check is required to ensure name and
1815                 * len are loaded atomically, so as not to walk off the
1816                 * edge of memory when walking. If we could load this
1817                 * atomically some other way, we could drop this check.
1818                 */
1819                if (read_seqcount_retry(&dentry->d_seq, *seq))
1820                        goto seqretry;
1821                if (parent->d_flags & DCACHE_OP_COMPARE) {
1822                        if (parent->d_op->d_compare(parent, *inode,
1823                                                dentry, i,
1824                                                tlen, tname, name))
1825                                continue;
1826                } else {
1827                        if (dentry_cmp(tname, tlen, str, len))
1828                                continue;
1829                }
1830                /*
1831                 * No extra seqcount check is required after the name
1832                 * compare. The caller must perform a seqcount check in
1833                 * order to do anything useful with the returned dentry
1834                 * anyway.
1835                 */
1836                *inode = i;
1837                return dentry;
1838        }
1839        return NULL;
1840}
1841
1842/**
1843 * d_lookup - search for a dentry
1844 * @parent: parent dentry
1845 * @name: qstr of name we wish to find
1846 * Returns: dentry, or NULL
1847 *
1848 * d_lookup searches the children of the parent dentry for the name in
1849 * question. If the dentry is found its reference count is incremented and the
1850 * dentry is returned. The caller must use dput to free the entry when it has
1851 * finished using it. %NULL is returned if the dentry does not exist.
1852 */
1853struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1854{
1855        struct dentry *dentry;
1856        unsigned seq;
1857
1858        do {
1859                seq = read_seqbegin(&rename_lock);
1860                dentry = __d_lookup(parent, name);
1861                if (dentry)
1862                        break;
1863        } while (read_seqretry(&rename_lock, seq));
1864        return dentry;
1865}
1866EXPORT_SYMBOL(d_lookup);
1867
1868/**
1869 * __d_lookup - search for a dentry (racy)
1870 * @parent: parent dentry
1871 * @name: qstr of name we wish to find
1872 * Returns: dentry, or NULL
1873 *
1874 * __d_lookup is like d_lookup, however it may (rarely) return a
1875 * false-negative result due to unrelated rename activity.
1876 *
1877 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1878 * however it must be used carefully, eg. with a following d_lookup in
1879 * the case of failure.
1880 *
1881 * __d_lookup callers must be commented.
1882 */
1883struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1884{
1885        unsigned int len = name->len;
1886        unsigned int hash = name->hash;
1887        const unsigned char *str = name->name;
1888        struct hlist_bl_head *b = d_hash(parent, hash);
1889        struct hlist_bl_node *node;
1890        struct dentry *found = NULL;
1891        struct dentry *dentry;
1892
1893        /*
1894         * Note: There is significant duplication with __d_lookup_rcu which is
1895         * required to prevent single threaded performance regressions
1896         * especially on architectures where smp_rmb (in seqcounts) are costly.
1897         * Keep the two functions in sync.
1898         */
1899
1900        /*
1901         * The hash list is protected using RCU.
1902         *
1903         * Take d_lock when comparing a candidate dentry, to avoid races
1904         * with d_move().
1905         *
1906         * It is possible that concurrent renames can mess up our list
1907         * walk here and result in missing our dentry, resulting in the
1908         * false-negative result. d_lookup() protects against concurrent
1909         * renames using rename_lock seqlock.
1910         *
1911         * See Documentation/filesystems/path-lookup.txt for more details.
1912         */
1913        rcu_read_lock();
1914        
1915        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1916                const char *tname;
1917                int tlen;
1918
1919                if (dentry->d_name.hash != hash)
1920                        continue;
1921
1922                spin_lock(&dentry->d_lock);
1923                if (dentry->d_parent != parent)
1924                        goto next;
1925                if (d_unhashed(dentry))
1926                        goto next;
1927
1928                /*
1929                 * It is safe to compare names since d_move() cannot
1930                 * change the qstr (protected by d_lock).
1931                 */
1932                tlen = dentry->d_name.len;
1933                tname = dentry->d_name.name;
1934                if (parent->d_flags & DCACHE_OP_COMPARE) {
1935                        if (parent->d_op->d_compare(parent, parent->d_inode,
1936                                                dentry, dentry->d_inode,
1937                                                tlen, tname, name))
1938                                goto next;
1939                } else {
1940                        if (dentry_cmp(tname, tlen, str, len))
1941                                goto next;
1942                }
1943
1944                dentry->d_count++;
1945                found = dentry;
1946                spin_unlock(&dentry->d_lock);
1947                break;
1948next:
1949                spin_unlock(&dentry->d_lock);
1950        }
1951        rcu_read_unlock();
1952
1953        return found;
1954}
1955
1956/**
1957 * d_hash_and_lookup - hash the qstr then search for a dentry
1958 * @dir: Directory to search in
1959 * @name: qstr of name we wish to find
1960 *
1961 * On hash failure or on lookup failure NULL is returned.
1962 */
1963struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1964{
1965        struct dentry *dentry = NULL;
1966
1967        /*
1968         * Check for a fs-specific hash function. Note that we must
1969         * calculate the standard hash first, as the d_op->d_hash()
1970         * routine may choose to leave the hash value unchanged.
1971         */
1972        name->hash = full_name_hash(name->name, name->len);
1973        if (dir->d_flags & DCACHE_OP_HASH) {
1974                if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1975                        goto out;
1976        }
1977        dentry = d_lookup(dir, name);
1978out:
1979        return dentry;
1980}
1981
1982/**
1983 * d_validate - verify dentry provided from insecure source (deprecated)
1984 * @dentry: The dentry alleged to be valid child of @dparent
1985 * @dparent: The parent dentry (known to be valid)
1986 *
1987 * An insecure source has sent us a dentry, here we verify it and dget() it.
1988 * This is used by ncpfs in its readdir implementation.
1989 * Zero is returned in the dentry is invalid.
1990 *
1991 * This function is slow for big directories, and deprecated, do not use it.
1992 */
1993int d_validate(struct dentry *dentry, struct dentry *dparent)
1994{
1995        struct dentry *child;
1996
1997        spin_lock(&dparent->d_lock);
1998        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1999                if (dentry == child) {
2000                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2001                        __dget_dlock(dentry);
2002                        spin_unlock(&dentry->d_lock);
2003                        spin_unlock(&dparent->d_lock);
2004                        return 1;
2005                }
2006        }
2007        spin_unlock(&dparent->d_lock);
2008
2009        return 0;
2010}
2011EXPORT_SYMBOL(d_validate);
2012
2013/*
2014 * When a file is deleted, we have two options:
2015 * - turn this dentry into a negative dentry
2016 * - unhash this dentry and free it.
2017 *
2018 * Usually, we want to just turn this into
2019 * a negative dentry, but if anybody else is
2020 * currently using the dentry or the inode
2021 * we can't do that and we fall back on removing
2022 * it from the hash queues and waiting for
2023 * it to be deleted later when it has no users
2024 */
2025 
2026/**
2027 * d_delete - delete a dentry
2028 * @dentry: The dentry to delete
2029 *
2030 * Turn the dentry into a negative dentry if possible, otherwise
2031 * remove it from the hash queues so it can be deleted later
2032 */
2033 
2034void d_delete(struct dentry * dentry)
2035{
2036        struct inode *inode;
2037        int isdir = 0;
2038        /*
2039         * Are we the only user?
2040         */
2041again:
2042        spin_lock(&dentry->d_lock);
2043        inode = dentry->d_inode;
2044        isdir = S_ISDIR(inode->i_mode);
2045        if (dentry->d_count == 1) {
2046                if (inode && !spin_trylock(&inode->i_lock)) {
2047                        spin_unlock(&dentry->d_lock);
2048                        cpu_relax();
2049                        goto again;
2050                }
2051                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2052                dentry_unlink_inode(dentry);
2053                fsnotify_nameremove(dentry, isdir);
2054                return;
2055        }
2056
2057        if (!d_unhashed(dentry))
2058                __d_drop(dentry);
2059
2060        spin_unlock(&dentry->d_lock);
2061
2062        fsnotify_nameremove(dentry, isdir);
2063}
2064EXPORT_SYMBOL(d_delete);
2065
2066static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2067{
2068        BUG_ON(!d_unhashed(entry));
2069        hlist_bl_lock(b);
2070        entry->d_flags |= DCACHE_RCUACCESS;
2071        hlist_bl_add_head_rcu(&entry->d_hash, b);
2072        hlist_bl_unlock(b);
2073}
2074
2075static void _d_rehash(struct dentry * entry)
2076{
2077        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2078}
2079
2080/**
2081 * d_rehash     - add an entry back to the hash
2082 * @entry: dentry to add to the hash
2083 *
2084 * Adds a dentry to the hash according to its name.
2085 */
2086 
2087void d_rehash(struct dentry * entry)
2088{
2089        spin_lock(&entry->d_lock);
2090        _d_rehash(entry);
2091        spin_unlock(&entry->d_lock);
2092}
2093EXPORT_SYMBOL(d_rehash);
2094
2095/**
2096 * dentry_update_name_case - update case insensitive dentry with a new name
2097 * @dentry: dentry to be updated
2098 * @name: new name
2099 *
2100 * Update a case insensitive dentry with new case of name.
2101 *
2102 * dentry must have been returned by d_lookup with name @name. Old and new
2103 * name lengths must match (ie. no d_compare which allows mismatched name
2104 * lengths).
2105 *
2106 * Parent inode i_mutex must be held over d_lookup and into this call (to
2107 * keep renames and concurrent inserts, and readdir(2) away).
2108 */
2109void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2110{
2111        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2112        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2113
2114        spin_lock(&dentry->d_lock);
2115        write_seqcount_begin(&dentry->d_seq);
2116        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2117        write_seqcount_end(&dentry->d_seq);
2118        spin_unlock(&dentry->d_lock);
2119}
2120EXPORT_SYMBOL(dentry_update_name_case);
2121
2122static void switch_names(struct dentry *dentry, struct dentry *target)
2123{
2124        if (dname_external(target)) {
2125                if (dname_external(dentry)) {
2126                        /*
2127                         * Both external: swap the pointers
2128                         */
2129                        swap(target->d_name.name, dentry->d_name.name);
2130                } else {
2131                        /*
2132                         * dentry:internal, target:external.  Steal target's
2133                         * storage and make target internal.
2134                         */
2135                        memcpy(target->d_iname, dentry->d_name.name,
2136                                        dentry->d_name.len + 1);
2137                        dentry->d_name.name = target->d_name.name;
2138                        target->d_name.name = target->d_iname;
2139                }
2140        } else {
2141                if (dname_external(dentry)) {
2142                        /*
2143                         * dentry:external, target:internal.  Give dentry's
2144                         * storage to target and make dentry internal
2145                         */
2146                        memcpy(dentry->d_iname, target->d_name.name,
2147                                        target->d_name.len + 1);
2148                        target->d_name.name = dentry->d_name.name;
2149                        dentry->d_name.name = dentry->d_iname;
2150                } else {
2151                        /*
2152                         * Both are internal.  Just copy target to dentry
2153                         */
2154                        memcpy(dentry->d_iname, target->d_name.name,
2155                                        target->d_name.len + 1);
2156                        dentry->d_name.len = target->d_name.len;
2157                        return;
2158                }
2159        }
2160        swap(dentry->d_name.len, target->d_name.len);
2161}
2162
2163static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2164{
2165        /*
2166         * XXXX: do we really need to take target->d_lock?
2167         */
2168        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2169                spin_lock(&target->d_parent->d_lock);
2170        else {
2171                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2172                        spin_lock(&dentry->d_parent->d_lock);
2173                        spin_lock_nested(&target->d_parent->d_lock,
2174                                                DENTRY_D_LOCK_NESTED);
2175                } else {
2176                        spin_lock(&target->d_parent->d_lock);
2177                        spin_lock_nested(&dentry->d_parent->d_lock,
2178                                                DENTRY_D_LOCK_NESTED);
2179                }
2180        }
2181        if (target < dentry) {
2182                spin_lock_nested(&target->d_lock, 2);
2183                spin_lock_nested(&dentry->d_lock, 3);
2184        } else {
2185                spin_lock_nested(&dentry->d_lock, 2);
2186                spin_lock_nested(&target->d_lock, 3);
2187        }
2188}
2189
2190static void dentry_unlock_parents_for_move(struct dentry *dentry,
2191                                        struct dentry *target)
2192{
2193        if (target->d_parent != dentry->d_parent)
2194                spin_unlock(&dentry->d_parent->d_lock);
2195        if (target->d_parent != target)
2196                spin_unlock(&target->d_parent->d_lock);
2197}
2198
2199/*
2200 * When switching names, the actual string doesn't strictly have to
2201 * be preserved in the target - because we're dropping the target
2202 * anyway. As such, we can just do a simple memcpy() to copy over
2203 * the new name before we switch.
2204 *
2205 * Note that we have to be a lot more careful about getting the hash
2206 * switched - we have to switch the hash value properly even if it
2207 * then no longer matches the actual (corrupted) string of the target.
2208 * The hash value has to match the hash queue that the dentry is on..
2209 */
2210/*
2211 * d_move - move a dentry
2212 * @dentry: entry to move
2213 * @target: new dentry
2214 *
2215 * Update the dcache to reflect the move of a file name. Negative
2216 * dcache entries should not be moved in this way.
2217 */
2218void d_move(struct dentry * dentry, struct dentry * target)
2219{
2220        if (!dentry->d_inode)
2221                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2222
2223        BUG_ON(d_ancestor(dentry, target));
2224        BUG_ON(d_ancestor(target, dentry));
2225
2226        write_seqlock(&rename_lock);
2227
2228        dentry_lock_for_move(dentry, target);
2229
2230        write_seqcount_begin(&dentry->d_seq);
2231        write_seqcount_begin(&target->d_seq);
2232
2233        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2234
2235        /*
2236         * Move the dentry to the target hash queue. Don't bother checking
2237         * for the same hash queue because of how unlikely it is.
2238         */
2239        __d_drop(dentry);
2240        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2241
2242        /* Unhash the target: dput() will then get rid of it */
2243        __d_drop(target);
2244
2245        list_del(&dentry->d_u.d_child);
2246        list_del(&target->d_u.d_child);
2247
2248        /* Switch the names.. */
2249        switch_names(dentry, target);
2250        swap(dentry->d_name.hash, target->d_name.hash);
2251
2252        /* ... and switch the parents */
2253        if (IS_ROOT(dentry)) {
2254                dentry->d_parent = target->d_parent;
2255                target->d_parent = target;
2256                INIT_LIST_HEAD(&target->d_u.d_child);
2257        } else {
2258                swap(dentry->d_parent, target->d_parent);
2259
2260                /* And add them back to the (new) parent lists */
2261                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2262        }
2263
2264        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2265
2266        write_seqcount_end(&target->d_seq);
2267        write_seqcount_end(&dentry->d_seq);
2268
2269        dentry_unlock_parents_for_move(dentry, target);
2270        spin_unlock(&target->d_lock);
2271        fsnotify_d_move(dentry);
2272        spin_unlock(&dentry->d_lock);
2273        write_sequnlock(&rename_lock);
2274}
2275EXPORT_SYMBOL(d_move);
2276
2277/**
2278 * d_ancestor - search for an ancestor
2279 * @p1: ancestor dentry
2280 * @p2: child dentry
2281 *
2282 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2283 * an ancestor of p2, else NULL.
2284 */
2285struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2286{
2287        struct dentry *p;
2288
2289        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2290                if (p->d_parent == p1)
2291                        return p;
2292        }
2293        return NULL;
2294}
2295
2296/*
2297 * This helper attempts to cope with remotely renamed directories
2298 *
2299 * It assumes that the caller is already holding
2300 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
2301 *
2302 * Note: If ever the locking in lock_rename() changes, then please
2303 * remember to update this too...
2304 */
2305static struct dentry *__d_unalias(struct inode *inode,
2306                struct dentry *dentry, struct dentry *alias)
2307{
2308        struct mutex *m1 = NULL, *m2 = NULL;
2309        struct dentry *ret;
2310
2311        /* If alias and dentry share a parent, then no extra locks required */
2312        if (alias->d_parent == dentry->d_parent)
2313                goto out_unalias;
2314
2315        /* Check for loops */
2316        ret = ERR_PTR(-ELOOP);
2317        if (d_ancestor(alias, dentry))
2318                goto out_err;
2319
2320        /* See lock_rename() */
2321        ret = ERR_PTR(-EBUSY);
2322        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2323                goto out_err;
2324        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2325        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2326                goto out_err;
2327        m2 = &alias->d_parent->d_inode->i_mutex;
2328out_unalias:
2329        d_move(alias, dentry);
2330        ret = alias;
2331out_err:
2332        spin_unlock(&inode->i_lock);
2333        if (m2)
2334                mutex_unlock(m2);
2335        if (m1)
2336                mutex_unlock(m1);
2337        return ret;
2338}
2339
2340/*
2341 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2342 * named dentry in place of the dentry to be replaced.
2343 * returns with anon->d_lock held!
2344 */
2345static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2346{
2347        struct dentry *dparent, *aparent;
2348
2349        dentry_lock_for_move(anon, dentry);
2350
2351        write_seqcount_begin(&dentry->d_seq);
2352        write_seqcount_begin(&anon->d_seq);
2353
2354        dparent = dentry->d_parent;
2355        aparent = anon->d_parent;
2356
2357        switch_names(dentry, anon);
2358        swap(dentry->d_name.hash, anon->d_name.hash);
2359
2360        dentry->d_parent = (aparent == anon) ? dentry : aparent;
2361        list_del(&dentry->d_u.d_child);
2362        if (!IS_ROOT(dentry))
2363                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2364        else
2365                INIT_LIST_HEAD(&dentry->d_u.d_child);
2366
2367        anon->d_parent = (dparent == dentry) ? anon : dparent;
2368        list_del(&anon->d_u.d_child);
2369        if (!IS_ROOT(anon))
2370                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2371        else
2372                INIT_LIST_HEAD(&anon->d_u.d_child);
2373
2374        write_seqcount_end(&dentry->d_seq);
2375        write_seqcount_end(&anon->d_seq);
2376
2377        dentry_unlock_parents_for_move(anon, dentry);
2378        spin_unlock(&dentry->d_lock);
2379
2380        /* anon->d_lock still locked, returns locked */
2381        anon->d_flags &= ~DCACHE_DISCONNECTED;
2382}
2383
2384/**
2385 * d_materialise_unique - introduce an inode into the tree
2386 * @dentry: candidate dentry
2387 * @inode: inode to bind to the dentry, to which aliases may be attached
2388 *
2389 * Introduces an dentry into the tree, substituting an extant disconnected
2390 * root directory alias in its place if there is one
2391 */
2392struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2393{
2394        struct dentry *actual;
2395
2396        BUG_ON(!d_unhashed(dentry));
2397
2398        if (!inode) {
2399                actual = dentry;
2400                __d_instantiate(dentry, NULL);
2401                d_rehash(actual);
2402                goto out_nolock;
2403        }
2404
2405        spin_lock(&inode->i_lock);
2406
2407        if (S_ISDIR(inode->i_mode)) {
2408                struct dentry *alias;
2409
2410                /* Does an aliased dentry already exist? */
2411                alias = __d_find_alias(inode, 0);
2412                if (alias) {
2413                        actual = alias;
2414                        /* Is this an anonymous mountpoint that we could splice
2415                         * into our tree? */
2416                        if (IS_ROOT(alias)) {
2417                                __d_materialise_dentry(dentry, alias);
2418                                __d_drop(alias);
2419                                goto found;
2420                        }
2421                        /* Nope, but we must(!) avoid directory aliasing */
2422                        actual = __d_unalias(inode, dentry, alias);
2423                        if (IS_ERR(actual))
2424                                dput(alias);
2425                        goto out_nolock;
2426                }
2427        }
2428
2429        /* Add a unique reference */
2430        actual = __d_instantiate_unique(dentry, inode);
2431        if (!actual)
2432                actual = dentry;
2433        else
2434                BUG_ON(!d_unhashed(actual));
2435
2436        spin_lock(&actual->d_lock);
2437found:
2438        _d_rehash(actual);
2439        spin_unlock(&actual->d_lock);
2440        spin_unlock(&inode->i_lock);
2441out_nolock:
2442        if (actual == dentry) {
2443                security_d_instantiate(dentry, inode);
2444                return NULL;
2445        }
2446
2447        iput(inode);
2448        return actual;
2449}
2450EXPORT_SYMBOL_GPL(d_materialise_unique);
2451
2452static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2453{
2454        *buflen -= namelen;
2455        if (*buflen < 0)
2456                return -ENAMETOOLONG;
2457        *buffer -= namelen;
2458        memcpy(*buffer, str, namelen);
2459        return 0;
2460}
2461
2462static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2463{
2464        return prepend(buffer, buflen, name->name, name->len);
2465}
2466
2467/**
2468 * prepend_path - Prepend path string to a buffer
2469 * @path: the dentry/vfsmount to report
2470 * @root: root vfsmnt/dentry (may be modified by this function)
2471 * @buffer: pointer to the end of the buffer
2472 * @buflen: pointer to buffer length
2473 *
2474 * Caller holds the rename_lock.
2475 *
2476 * If path is not reachable from the supplied root, then the value of
2477 * root is changed (without modifying refcounts).
2478 */
2479static int prepend_path(const struct path *path, struct path *root,
2480                        char **buffer, int *buflen)
2481{
2482        struct dentry *dentry = path->dentry;
2483        struct vfsmount *vfsmnt = path->mnt;
2484        bool slash = false;
2485        int error = 0;
2486
2487        br_read_lock(vfsmount_lock);
2488        while (dentry != root->dentry || vfsmnt != root->mnt) {
2489                struct dentry * parent;
2490
2491                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2492                        /* Global root? */
2493                        if (vfsmnt->mnt_parent == vfsmnt) {
2494                                goto global_root;
2495                        }
2496                        dentry = vfsmnt->mnt_mountpoint;
2497                        vfsmnt = vfsmnt->mnt_parent;
2498                        continue;
2499                }
2500                parent = dentry->d_parent;
2501                prefetch(parent);
2502                spin_lock(&dentry->d_lock);
2503                error = prepend_name(buffer, buflen, &dentry->d_name);
2504                spin_unlock(&dentry->d_lock);
2505                if (!error)
2506                        error = prepend(buffer, buflen, "/", 1);
2507                if (error)
2508                        break;
2509
2510                slash = true;
2511                dentry = parent;
2512        }
2513
2514out:
2515        if (!error && !slash)
2516                error = prepend(buffer, buflen, "/", 1);
2517
2518        br_read_unlock(vfsmount_lock);
2519        return error;
2520
2521global_root:
2522        /*
2523         * Filesystems needing to implement special "root names"
2524         * should do so with ->d_dname()
2525         */
2526        if (IS_ROOT(dentry) &&
2527            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2528                WARN(1, "Root dentry has weird name <%.*s>\n",
2529                     (int) dentry->d_name.len, dentry->d_name.name);
2530        }
2531        root->mnt = vfsmnt;
2532        root->dentry = dentry;
2533        goto out;
2534}
2535
2536/**
2537 * __d_path - return the path of a dentry
2538 * @path: the dentry/vfsmount to report
2539 * @root: root vfsmnt/dentry (may be modified by this function)
2540 * @buf: buffer to return value in
2541 * @buflen: buffer length
2542 *
2543 * Convert a dentry into an ASCII path name.
2544 *
2545 * Returns a pointer into the buffer or an error code if the
2546 * path was too long.
2547 *
2548 * "buflen" should be positive.
2549 *
2550 * If path is not reachable from the supplied root, then the value of
2551 * root is changed (without modifying refcounts).
2552 */
2553char *__d_path(const struct path *path, struct path *root,
2554               char *buf, int buflen)
2555{
2556        char *res = buf + buflen;
2557        int error;
2558
2559        prepend(&res, &buflen, "\0", 1);
2560        write_seqlock(&rename_lock);
2561        error = prepend_path(path, root, &res, &buflen);
2562        write_sequnlock(&rename_lock);
2563
2564        if (error)
2565                return ERR_PTR(error);
2566        return res;
2567}
2568
2569/*
2570 * same as __d_path but appends "(deleted)" for unlinked files.
2571 */
2572static int path_with_deleted(const struct path *path, struct path *root,
2573                                 char **buf, int *buflen)
2574{
2575        prepend(buf, buflen, "\0", 1);
2576        if (d_unlinked(path->dentry)) {
2577                int error = prepend(buf, buflen, " (deleted)", 10);
2578                if (error)
2579                        return error;
2580        }
2581
2582        return prepend_path(path, root, buf, buflen);
2583}
2584
2585static int prepend_unreachable(char **buffer, int *buflen)
2586{
2587        return prepend(buffer, buflen, "(unreachable)", 13);
2588}
2589
2590/**
2591 * d_path - return the path of a dentry
2592 * @path: path to report
2593 * @buf: buffer to return value in
2594 * @buflen: buffer length
2595 *
2596 * Convert a dentry into an ASCII path name. If the entry has been deleted
2597 * the string " (deleted)" is appended. Note that this is ambiguous.
2598 *
2599 * Returns a pointer into the buffer or an error code if the path was
2600 * too long. Note: Callers should use the returned pointer, not the passed
2601 * in buffer, to use the name! The implementation often starts at an offset
2602 * into the buffer, and may leave 0 bytes at the start.
2603 *
2604 * "buflen" should be positive.
2605 */
2606char *d_path(const struct path *path, char *buf, int buflen)
2607{
2608        char *res = buf + buflen;
2609        struct path root;
2610        struct path tmp;
2611        int error;
2612
2613        /*
2614         * We have various synthetic filesystems that never get mounted.  On
2615         * these filesystems dentries are never used for lookup purposes, and
2616         * thus don't need to be hashed.  They also don't need a name until a
2617         * user wants to identify the object in /proc/pid/fd/.  The little hack
2618         * below allows us to generate a name for these objects on demand:
2619         */
2620        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2621                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2622
2623        get_fs_root(current->fs, &root);
2624        write_seqlock(&rename_lock);
2625        tmp = root;
2626        error = path_with_deleted(path, &tmp, &res, &buflen);
2627        if (error)
2628                res = ERR_PTR(error);
2629        write_sequnlock(&rename_lock);
2630        path_put(&root);
2631        return res;
2632}
2633EXPORT_SYMBOL(d_path);
2634
2635/**
2636 * d_path_with_unreachable - return the path of a dentry
2637 * @path: path to report
2638 * @buf: buffer to return value in
2639 * @buflen: buffer length
2640 *
2641 * The difference from d_path() is that this prepends "(unreachable)"
2642 * to paths which are unreachable from the current process' root.
2643 */
2644char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2645{
2646        char *res = buf + buflen;
2647        struct path root;
2648        struct path tmp;
2649        int error;
2650
2651        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2652                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2653
2654        get_fs_root(current->fs, &root);
2655        write_seqlock(&rename_lock);
2656        tmp = root;
2657        error = path_with_deleted(path, &tmp, &res, &buflen);
2658        if (!error && !path_equal(&tmp, &root))
2659                error = prepend_unreachable(&res, &buflen);
2660        write_sequnlock(&rename_lock);
2661        path_put(&root);
2662        if (error)
2663                res =  ERR_PTR(error);
2664
2665        return res;
2666}
2667
2668/*
2669 * Helper function for dentry_operations.d_dname() members
2670 */
2671char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2672                        const char *fmt, ...)
2673{
2674        va_list args;
2675        char temp[64];
2676        int sz;
2677
2678        va_start(args, fmt);
2679        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2680        va_end(args);
2681
2682        if (sz > sizeof(temp) || sz > buflen)
2683                return ERR_PTR(-ENAMETOOLONG);
2684
2685        buffer += buflen - sz;
2686        return memcpy(buffer, temp, sz);
2687}
2688
2689/*
2690 * Write full pathname from the root of the filesystem into the buffer.
2691 */
2692static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2693{
2694        char *end = buf + buflen;
2695        char *retval;
2696
2697        prepend(&end, &buflen, "\0", 1);
2698        if (buflen < 1)
2699                goto Elong;
2700        /* Get '/' right */
2701        retval = end-1;
2702        *retval = '/';
2703
2704        while (!IS_ROOT(dentry)) {
2705                struct dentry *parent = dentry->d_parent;
2706                int error;
2707
2708                prefetch(parent);
2709                spin_lock(&dentry->d_lock);
2710                error = prepend_name(&end, &buflen, &dentry->d_name);
2711                spin_unlock(&dentry->d_lock);
2712                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2713                        goto Elong;
2714
2715                retval = end;
2716                dentry = parent;
2717        }
2718        return retval;
2719Elong:
2720        return ERR_PTR(-ENAMETOOLONG);
2721}
2722
2723char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2724{
2725        char *retval;
2726
2727        write_seqlock(&rename_lock);
2728        retval = __dentry_path(dentry, buf, buflen);
2729        write_sequnlock(&rename_lock);
2730
2731        return retval;
2732}
2733EXPORT_SYMBOL(dentry_path_raw);
2734
2735char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2736{
2737        char *p = NULL;
2738        char *retval;
2739
2740        write_seqlock(&rename_lock);
2741        if (d_unlinked(dentry)) {
2742                p = buf + buflen;
2743                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2744                        goto Elong;
2745                buflen++;
2746        }
2747        retval = __dentry_path(dentry, buf, buflen);
2748        write_sequnlock(&rename_lock);
2749        if (!IS_ERR(retval) && p)
2750                *p = '/';       /* restore '/' overriden with '\0' */
2751        return retval;
2752Elong:
2753        return ERR_PTR(-ENAMETOOLONG);
2754}
2755
2756/*
2757 * NOTE! The user-level library version returns a
2758 * character pointer. The kernel system call just
2759 * returns the length of the buffer filled (which
2760 * includes the ending '\0' character), or a negative
2761 * error value. So libc would do something like
2762 *
2763 *      char *getcwd(char * buf, size_t size)
2764 *      {
2765 *              int retval;
2766 *
2767 *              retval = sys_getcwd(buf, size);
2768 *              if (retval >= 0)
2769 *                      return buf;
2770 *              errno = -retval;
2771 *              return NULL;
2772 *      }
2773 */
2774SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2775{
2776        int error;
2777        struct path pwd, root;
2778        char *page = (char *) __get_free_page(GFP_USER);
2779
2780        if (!page)
2781                return -ENOMEM;
2782
2783        get_fs_root_and_pwd(current->fs, &root, &pwd);
2784
2785        error = -ENOENT;
2786        write_seqlock(&rename_lock);
2787        if (!d_unlinked(pwd.dentry)) {
2788                unsigned long len;
2789                struct path tmp = root;
2790                char *cwd = page + PAGE_SIZE;
2791                int buflen = PAGE_SIZE;
2792
2793                prepend(&cwd, &buflen, "\0", 1);
2794                error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2795                write_sequnlock(&rename_lock);
2796
2797                if (error)
2798                        goto out;
2799
2800                /* Unreachable from current root */
2801                if (!path_equal(&tmp, &root)) {
2802                        error = prepend_unreachable(&cwd, &buflen);
2803                        if (error)
2804                                goto out;
2805                }
2806
2807                error = -ERANGE;
2808                len = PAGE_SIZE + page - cwd;
2809                if (len <= size) {
2810                        error = len;
2811                        if (copy_to_user(buf, cwd, len))
2812                                error = -EFAULT;
2813                }
2814        } else {
2815                write_sequnlock(&rename_lock);
2816        }
2817
2818out:
2819        path_put(&pwd);
2820        path_put(&root);
2821        free_page((unsigned long) page);
2822        return error;
2823}
2824
2825/*
2826 * Test whether new_dentry is a subdirectory of old_dentry.
2827 *
2828 * Trivially implemented using the dcache structure
2829 */
2830
2831/**
2832 * is_subdir - is new dentry a subdirectory of old_dentry
2833 * @new_dentry: new dentry
2834 * @old_dentry: old dentry
2835 *
2836 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2837 * Returns 0 otherwise.
2838 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2839 */
2840  
2841int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2842{
2843        int result;
2844        unsigned seq;
2845
2846        if (new_dentry == old_dentry)
2847                return 1;
2848
2849        do {
2850                /* for restarting inner loop in case of seq retry */
2851                seq = read_seqbegin(&rename_lock);
2852                /*
2853                 * Need rcu_readlock to protect against the d_parent trashing
2854                 * due to d_move
2855                 */
2856                rcu_read_lock();
2857                if (d_ancestor(old_dentry, new_dentry))
2858                        result = 1;
2859                else
2860                        result = 0;
2861                rcu_read_unlock();
2862        } while (read_seqretry(&rename_lock, seq));
2863
2864        return result;
2865}
2866
2867int path_is_under(struct path *path1, struct path *path2)
2868{
2869        struct vfsmount *mnt = path1->mnt;
2870        struct dentry *dentry = path1->dentry;
2871        int res;
2872
2873        br_read_lock(vfsmount_lock);
2874        if (mnt != path2->mnt) {
2875                for (;;) {
2876                        if (mnt->mnt_parent == mnt) {
2877                                br_read_unlock(vfsmount_lock);
2878                                return 0;
2879                        }
2880                        if (mnt->mnt_parent == path2->mnt)
2881                                break;
2882                        mnt = mnt->mnt_parent;
2883                }
2884                dentry = mnt->mnt_mountpoint;
2885        }
2886        res = is_subdir(dentry, path2->dentry);
2887        br_read_unlock(vfsmount_lock);
2888        return res;
2889}
2890EXPORT_SYMBOL(path_is_under);
2891
2892void d_genocide(struct dentry *root)
2893{
2894        struct dentry *this_parent;
2895        struct list_head *next;
2896        unsigned seq;
2897        int locked = 0;
2898
2899        seq = read_seqbegin(&rename_lock);
2900again:
2901        this_parent = root;
2902        spin_lock(&this_parent->d_lock);
2903repeat:
2904        next = this_parent->d_subdirs.next;
2905resume:
2906        while (next != &this_parent->d_subdirs) {
2907                struct list_head *tmp = next;
2908                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2909                next = tmp->next;
2910
2911                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2912                if (d_unhashed(dentry) || !dentry->d_inode) {
2913                        spin_unlock(&dentry->d_lock);
2914                        continue;
2915                }
2916                if (!list_empty(&dentry->d_subdirs)) {
2917                        spin_unlock(&this_parent->d_lock);
2918                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2919                        this_parent = dentry;
2920                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2921                        goto repeat;
2922                }
2923                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2924                        dentry->d_flags |= DCACHE_GENOCIDE;
2925                        dentry->d_count--;
2926                }
2927                spin_unlock(&dentry->d_lock);
2928        }
2929        if (this_parent != root) {
2930                struct dentry *child = this_parent;
2931                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2932                        this_parent->d_flags |= DCACHE_GENOCIDE;
2933                        this_parent->d_count--;
2934                }
2935                this_parent = try_to_ascend(this_parent, locked, seq);
2936                if (!this_parent)
2937                        goto rename_retry;
2938                next = child->d_u.d_child.next;
2939                goto resume;
2940        }
2941        spin_unlock(&this_parent->d_lock);
2942        if (!locked && read_seqretry(&rename_lock, seq))
2943                goto rename_retry;
2944        if (locked)
2945                write_sequnlock(&rename_lock);
2946        return;
2947
2948rename_retry:
2949        locked = 1;
2950        write_seqlock(&rename_lock);
2951        goto again;
2952}
2953
2954/**
2955 * find_inode_number - check for dentry with name
2956 * @dir: directory to check
2957 * @name: Name to find.
2958 *
2959 * Check whether a dentry already exists for the given name,
2960 * and return the inode number if it has an inode. Otherwise
2961 * 0 is returned.
2962 *
2963 * This routine is used to post-process directory listings for
2964 * filesystems using synthetic inode numbers, and is necessary
2965 * to keep getcwd() working.
2966 */
2967 
2968ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2969{
2970        struct dentry * dentry;
2971        ino_t ino = 0;
2972
2973        dentry = d_hash_and_lookup(dir, name);
2974        if (dentry) {
2975                if (dentry->d_inode)
2976                        ino = dentry->d_inode->i_ino;
2977                dput(dentry);
2978        }
2979        return ino;
2980}
2981EXPORT_SYMBOL(find_inode_number);
2982
2983static __initdata unsigned long dhash_entries;
2984static int __init set_dhash_entries(char *str)
2985{
2986        if (!str)
2987                return 0;
2988        dhash_entries = simple_strtoul(str, &str, 0);
2989        return 1;
2990}
2991__setup("dhash_entries=", set_dhash_entries);
2992
2993static void __init dcache_init_early(void)
2994{
2995        int loop;
2996
2997        /* If hashes are distributed across NUMA nodes, defer
2998         * hash allocation until vmalloc space is available.
2999         */
3000        if (hashdist)
3001                return;
3002
3003        dentry_hashtable =
3004                alloc_large_system_hash("Dentry cache",
3005                                        sizeof(struct hlist_bl_head),
3006                                        dhash_entries,
3007                                        13,
3008                                        HASH_EARLY,
3009                                        &d_hash_shift,
3010                                        &d_hash_mask,
3011                                        0);
3012
3013        for (loop = 0; loop < (1 << d_hash_shift); loop++)
3014                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3015}
3016
3017static void __init dcache_init(void)
3018{
3019        int loop;
3020
3021        /* 
3022         * A constructor could be added for stable state like the lists,
3023         * but it is probably not worth it because of the cache nature
3024         * of the dcache. 
3025         */
3026        dentry_cache = KMEM_CACHE(dentry,
3027                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3028        
3029        register_shrinker(&dcache_shrinker);
3030
3031        /* Hash may have been set up in dcache_init_early */
3032        if (!hashdist)
3033                return;
3034
3035        dentry_hashtable =
3036                alloc_large_system_hash("Dentry cache",
3037                                        sizeof(struct hlist_bl_head),
3038                                        dhash_entries,
3039                                        13,
3040                                        0,
3041                                        &d_hash_shift,
3042                                        &d_hash_mask,
3043                                        0);
3044
3045        for (loop = 0; loop < (1 << d_hash_shift); loop++)
3046                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3047}
3048
3049/* SLAB cache for __getname() consumers */
3050struct kmem_cache *names_cachep __read_mostly;
3051EXPORT_SYMBOL(names_cachep);
3052
3053EXPORT_SYMBOL(d_genocide);
3054
3055void __init vfs_caches_init_early(void)
3056{
3057        dcache_init_early();
3058        inode_init_early();
3059}
3060
3061void __init vfs_caches_init(unsigned long mempages)
3062{
3063        unsigned long reserve;
3064
3065        /* Base hash sizes on available memory, with a reserve equal to
3066           150% of current kernel size */
3067
3068        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3069        mempages -= reserve;
3070
3071        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3072                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3073
3074        dcache_init();
3075        inode_init();
3076        files_init(mempages);
3077        mnt_init();
3078        bdev_cache_init();
3079        chrdev_init();
3080}
3081