linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/module.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include "internal.h"
  37
  38int sysctl_vfs_cache_pressure __read_mostly = 100;
  39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  40
  41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
  42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  43
  44EXPORT_SYMBOL(dcache_lock);
  45
  46static struct kmem_cache *dentry_cache __read_mostly;
  47
  48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
  49
  50/*
  51 * This is the single most critical data structure when it comes
  52 * to the dcache: the hashtable for lookups. Somebody should try
  53 * to make this good - I've just made it work.
  54 *
  55 * This hash-function tries to avoid losing too many bits of hash
  56 * information, yet avoid using a prime hash-size or similar.
  57 */
  58#define D_HASHBITS     d_hash_shift
  59#define D_HASHMASK     d_hash_mask
  60
  61static unsigned int d_hash_mask __read_mostly;
  62static unsigned int d_hash_shift __read_mostly;
  63static struct hlist_head *dentry_hashtable __read_mostly;
  64
  65/* Statistics gathering. */
  66struct dentry_stat_t dentry_stat = {
  67        .age_limit = 45,
  68};
  69
  70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
  71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
  72
  73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
  74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
  75                   size_t *lenp, loff_t *ppos)
  76{
  77        dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
  78        dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
  79        return proc_dointvec(table, write, buffer, lenp, ppos);
  80}
  81#endif
  82
  83static void __d_free(struct rcu_head *head)
  84{
  85        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
  86
  87        WARN_ON(!list_empty(&dentry->d_alias));
  88        if (dname_external(dentry))
  89                kfree(dentry->d_name.name);
  90        kmem_cache_free(dentry_cache, dentry); 
  91}
  92
  93/*
  94 * no dcache_lock, please.
  95 */
  96static void d_free(struct dentry *dentry)
  97{
  98        percpu_counter_dec(&nr_dentry);
  99        if (dentry->d_op && dentry->d_op->d_release)
 100                dentry->d_op->d_release(dentry);
 101
 102        /* if dentry was never inserted into hash, immediate free is OK */
 103        if (hlist_unhashed(&dentry->d_hash))
 104                __d_free(&dentry->d_u.d_rcu);
 105        else
 106                call_rcu(&dentry->d_u.d_rcu, __d_free);
 107}
 108
 109/*
 110 * Release the dentry's inode, using the filesystem
 111 * d_iput() operation if defined.
 112 */
 113static void dentry_iput(struct dentry * dentry)
 114        __releases(dentry->d_lock)
 115        __releases(dcache_lock)
 116{
 117        struct inode *inode = dentry->d_inode;
 118        if (inode) {
 119                dentry->d_inode = NULL;
 120                list_del_init(&dentry->d_alias);
 121                spin_unlock(&dentry->d_lock);
 122                spin_unlock(&dcache_lock);
 123                if (!inode->i_nlink)
 124                        fsnotify_inoderemove(inode);
 125                if (dentry->d_op && dentry->d_op->d_iput)
 126                        dentry->d_op->d_iput(dentry, inode);
 127                else
 128                        iput(inode);
 129        } else {
 130                spin_unlock(&dentry->d_lock);
 131                spin_unlock(&dcache_lock);
 132        }
 133}
 134
 135/*
 136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
 137 */
 138static void dentry_lru_add(struct dentry *dentry)
 139{
 140        if (list_empty(&dentry->d_lru)) {
 141                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 142                dentry->d_sb->s_nr_dentry_unused++;
 143                percpu_counter_inc(&nr_dentry_unused);
 144        }
 145}
 146
 147static void dentry_lru_del(struct dentry *dentry)
 148{
 149        if (!list_empty(&dentry->d_lru)) {
 150                list_del_init(&dentry->d_lru);
 151                dentry->d_sb->s_nr_dentry_unused--;
 152                percpu_counter_dec(&nr_dentry_unused);
 153        }
 154}
 155
 156static void dentry_lru_move_tail(struct dentry *dentry)
 157{
 158        if (list_empty(&dentry->d_lru)) {
 159                list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 160                dentry->d_sb->s_nr_dentry_unused++;
 161                percpu_counter_inc(&nr_dentry_unused);
 162        } else {
 163                list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 164        }
 165}
 166
 167/**
 168 * d_kill - kill dentry and return parent
 169 * @dentry: dentry to kill
 170 *
 171 * The dentry must already be unhashed and removed from the LRU.
 172 *
 173 * If this is the root of the dentry tree, return NULL.
 174 */
 175static struct dentry *d_kill(struct dentry *dentry)
 176        __releases(dentry->d_lock)
 177        __releases(dcache_lock)
 178{
 179        struct dentry *parent;
 180
 181        list_del(&dentry->d_u.d_child);
 182        /*drops the locks, at that point nobody can reach this dentry */
 183        dentry_iput(dentry);
 184        if (IS_ROOT(dentry))
 185                parent = NULL;
 186        else
 187                parent = dentry->d_parent;
 188        d_free(dentry);
 189        return parent;
 190}
 191
 192/* 
 193 * This is dput
 194 *
 195 * This is complicated by the fact that we do not want to put
 196 * dentries that are no longer on any hash chain on the unused
 197 * list: we'd much rather just get rid of them immediately.
 198 *
 199 * However, that implies that we have to traverse the dentry
 200 * tree upwards to the parents which might _also_ now be
 201 * scheduled for deletion (it may have been only waiting for
 202 * its last child to go away).
 203 *
 204 * This tail recursion is done by hand as we don't want to depend
 205 * on the compiler to always get this right (gcc generally doesn't).
 206 * Real recursion would eat up our stack space.
 207 */
 208
 209/*
 210 * dput - release a dentry
 211 * @dentry: dentry to release 
 212 *
 213 * Release a dentry. This will drop the usage count and if appropriate
 214 * call the dentry unlink method as well as removing it from the queues and
 215 * releasing its resources. If the parent dentries were scheduled for release
 216 * they too may now get deleted.
 217 *
 218 * no dcache lock, please.
 219 */
 220
 221void dput(struct dentry *dentry)
 222{
 223        if (!dentry)
 224                return;
 225
 226repeat:
 227        if (atomic_read(&dentry->d_count) == 1)
 228                might_sleep();
 229        if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
 230                return;
 231
 232        spin_lock(&dentry->d_lock);
 233        if (atomic_read(&dentry->d_count)) {
 234                spin_unlock(&dentry->d_lock);
 235                spin_unlock(&dcache_lock);
 236                return;
 237        }
 238
 239        /*
 240         * AV: ->d_delete() is _NOT_ allowed to block now.
 241         */
 242        if (dentry->d_op && dentry->d_op->d_delete) {
 243                if (dentry->d_op->d_delete(dentry))
 244                        goto unhash_it;
 245        }
 246
 247        /* Unreachable? Get rid of it */
 248        if (d_unhashed(dentry))
 249                goto kill_it;
 250
 251        /* Otherwise leave it cached and ensure it's on the LRU */
 252        dentry->d_flags |= DCACHE_REFERENCED;
 253        dentry_lru_add(dentry);
 254
 255        spin_unlock(&dentry->d_lock);
 256        spin_unlock(&dcache_lock);
 257        return;
 258
 259unhash_it:
 260        __d_drop(dentry);
 261kill_it:
 262        /* if dentry was on the d_lru list delete it from there */
 263        dentry_lru_del(dentry);
 264        dentry = d_kill(dentry);
 265        if (dentry)
 266                goto repeat;
 267}
 268EXPORT_SYMBOL(dput);
 269
 270/**
 271 * d_invalidate - invalidate a dentry
 272 * @dentry: dentry to invalidate
 273 *
 274 * Try to invalidate the dentry if it turns out to be
 275 * possible. If there are other dentries that can be
 276 * reached through this one we can't delete it and we
 277 * return -EBUSY. On success we return 0.
 278 *
 279 * no dcache lock.
 280 */
 281 
 282int d_invalidate(struct dentry * dentry)
 283{
 284        /*
 285         * If it's already been dropped, return OK.
 286         */
 287        spin_lock(&dcache_lock);
 288        if (d_unhashed(dentry)) {
 289                spin_unlock(&dcache_lock);
 290                return 0;
 291        }
 292        /*
 293         * Check whether to do a partial shrink_dcache
 294         * to get rid of unused child entries.
 295         */
 296        if (!list_empty(&dentry->d_subdirs)) {
 297                spin_unlock(&dcache_lock);
 298                shrink_dcache_parent(dentry);
 299                spin_lock(&dcache_lock);
 300        }
 301
 302        /*
 303         * Somebody else still using it?
 304         *
 305         * If it's a directory, we can't drop it
 306         * for fear of somebody re-populating it
 307         * with children (even though dropping it
 308         * would make it unreachable from the root,
 309         * we might still populate it if it was a
 310         * working directory or similar).
 311         */
 312        spin_lock(&dentry->d_lock);
 313        if (atomic_read(&dentry->d_count) > 1) {
 314                if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 315                        spin_unlock(&dentry->d_lock);
 316                        spin_unlock(&dcache_lock);
 317                        return -EBUSY;
 318                }
 319        }
 320
 321        __d_drop(dentry);
 322        spin_unlock(&dentry->d_lock);
 323        spin_unlock(&dcache_lock);
 324        return 0;
 325}
 326EXPORT_SYMBOL(d_invalidate);
 327
 328/* This should be called _only_ with dcache_lock held */
 329static inline struct dentry * __dget_locked(struct dentry *dentry)
 330{
 331        atomic_inc(&dentry->d_count);
 332        dentry_lru_del(dentry);
 333        return dentry;
 334}
 335
 336struct dentry * dget_locked(struct dentry *dentry)
 337{
 338        return __dget_locked(dentry);
 339}
 340EXPORT_SYMBOL(dget_locked);
 341
 342/**
 343 * d_find_alias - grab a hashed alias of inode
 344 * @inode: inode in question
 345 * @want_discon:  flag, used by d_splice_alias, to request
 346 *          that only a DISCONNECTED alias be returned.
 347 *
 348 * If inode has a hashed alias, or is a directory and has any alias,
 349 * acquire the reference to alias and return it. Otherwise return NULL.
 350 * Notice that if inode is a directory there can be only one alias and
 351 * it can be unhashed only if it has no children, or if it is the root
 352 * of a filesystem.
 353 *
 354 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 355 * any other hashed alias over that one unless @want_discon is set,
 356 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 357 */
 358
 359static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
 360{
 361        struct list_head *head, *next, *tmp;
 362        struct dentry *alias, *discon_alias=NULL;
 363
 364        head = &inode->i_dentry;
 365        next = inode->i_dentry.next;
 366        while (next != head) {
 367                tmp = next;
 368                next = tmp->next;
 369                prefetch(next);
 370                alias = list_entry(tmp, struct dentry, d_alias);
 371                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 372                        if (IS_ROOT(alias) &&
 373                            (alias->d_flags & DCACHE_DISCONNECTED))
 374                                discon_alias = alias;
 375                        else if (!want_discon) {
 376                                __dget_locked(alias);
 377                                return alias;
 378                        }
 379                }
 380        }
 381        if (discon_alias)
 382                __dget_locked(discon_alias);
 383        return discon_alias;
 384}
 385
 386struct dentry * d_find_alias(struct inode *inode)
 387{
 388        struct dentry *de = NULL;
 389
 390        if (!list_empty(&inode->i_dentry)) {
 391                spin_lock(&dcache_lock);
 392                de = __d_find_alias(inode, 0);
 393                spin_unlock(&dcache_lock);
 394        }
 395        return de;
 396}
 397EXPORT_SYMBOL(d_find_alias);
 398
 399/*
 400 *      Try to kill dentries associated with this inode.
 401 * WARNING: you must own a reference to inode.
 402 */
 403void d_prune_aliases(struct inode *inode)
 404{
 405        struct dentry *dentry;
 406restart:
 407        spin_lock(&dcache_lock);
 408        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 409                spin_lock(&dentry->d_lock);
 410                if (!atomic_read(&dentry->d_count)) {
 411                        __dget_locked(dentry);
 412                        __d_drop(dentry);
 413                        spin_unlock(&dentry->d_lock);
 414                        spin_unlock(&dcache_lock);
 415                        dput(dentry);
 416                        goto restart;
 417                }
 418                spin_unlock(&dentry->d_lock);
 419        }
 420        spin_unlock(&dcache_lock);
 421}
 422EXPORT_SYMBOL(d_prune_aliases);
 423
 424/*
 425 * Throw away a dentry - free the inode, dput the parent.  This requires that
 426 * the LRU list has already been removed.
 427 *
 428 * Try to prune ancestors as well.  This is necessary to prevent
 429 * quadratic behavior of shrink_dcache_parent(), but is also expected
 430 * to be beneficial in reducing dentry cache fragmentation.
 431 */
 432static void prune_one_dentry(struct dentry * dentry)
 433        __releases(dentry->d_lock)
 434        __releases(dcache_lock)
 435        __acquires(dcache_lock)
 436{
 437        __d_drop(dentry);
 438        dentry = d_kill(dentry);
 439
 440        /*
 441         * Prune ancestors.  Locking is simpler than in dput(),
 442         * because dcache_lock needs to be taken anyway.
 443         */
 444        spin_lock(&dcache_lock);
 445        while (dentry) {
 446                if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
 447                        return;
 448
 449                if (dentry->d_op && dentry->d_op->d_delete)
 450                        dentry->d_op->d_delete(dentry);
 451                dentry_lru_del(dentry);
 452                __d_drop(dentry);
 453                dentry = d_kill(dentry);
 454                spin_lock(&dcache_lock);
 455        }
 456}
 457
 458static void shrink_dentry_list(struct list_head *list)
 459{
 460        struct dentry *dentry;
 461
 462        while (!list_empty(list)) {
 463                dentry = list_entry(list->prev, struct dentry, d_lru);
 464                dentry_lru_del(dentry);
 465
 466                /*
 467                 * We found an inuse dentry which was not removed from
 468                 * the LRU because of laziness during lookup.  Do not free
 469                 * it - just keep it off the LRU list.
 470                 */
 471                spin_lock(&dentry->d_lock);
 472                if (atomic_read(&dentry->d_count)) {
 473                        spin_unlock(&dentry->d_lock);
 474                        continue;
 475                }
 476                prune_one_dentry(dentry);
 477                /* dentry->d_lock was dropped in prune_one_dentry() */
 478                cond_resched_lock(&dcache_lock);
 479        }
 480}
 481
 482/**
 483 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
 484 * @sb:         superblock to shrink dentry LRU.
 485 * @count:      number of entries to prune
 486 * @flags:      flags to control the dentry processing
 487 *
 488 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
 489 */
 490static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 491{
 492        /* called from prune_dcache() and shrink_dcache_parent() */
 493        struct dentry *dentry;
 494        LIST_HEAD(referenced);
 495        LIST_HEAD(tmp);
 496        int cnt = *count;
 497
 498        spin_lock(&dcache_lock);
 499        while (!list_empty(&sb->s_dentry_lru)) {
 500                dentry = list_entry(sb->s_dentry_lru.prev,
 501                                struct dentry, d_lru);
 502                BUG_ON(dentry->d_sb != sb);
 503
 504                /*
 505                 * If we are honouring the DCACHE_REFERENCED flag and the
 506                 * dentry has this flag set, don't free it.  Clear the flag
 507                 * and put it back on the LRU.
 508                 */
 509                if (flags & DCACHE_REFERENCED) {
 510                        spin_lock(&dentry->d_lock);
 511                        if (dentry->d_flags & DCACHE_REFERENCED) {
 512                                dentry->d_flags &= ~DCACHE_REFERENCED;
 513                                list_move(&dentry->d_lru, &referenced);
 514                                spin_unlock(&dentry->d_lock);
 515                                cond_resched_lock(&dcache_lock);
 516                                continue;
 517                        }
 518                        spin_unlock(&dentry->d_lock);
 519                }
 520
 521                list_move_tail(&dentry->d_lru, &tmp);
 522                if (!--cnt)
 523                        break;
 524                cond_resched_lock(&dcache_lock);
 525        }
 526
 527        *count = cnt;
 528        shrink_dentry_list(&tmp);
 529
 530        if (!list_empty(&referenced))
 531                list_splice(&referenced, &sb->s_dentry_lru);
 532        spin_unlock(&dcache_lock);
 533
 534}
 535
 536/**
 537 * prune_dcache - shrink the dcache
 538 * @count: number of entries to try to free
 539 *
 540 * Shrink the dcache. This is done when we need more memory, or simply when we
 541 * need to unmount something (at which point we need to unuse all dentries).
 542 *
 543 * This function may fail to free any resources if all the dentries are in use.
 544 */
 545static void prune_dcache(int count)
 546{
 547        struct super_block *sb, *p = NULL;
 548        int w_count;
 549        int unused = percpu_counter_sum_positive(&nr_dentry_unused);
 550        int prune_ratio;
 551        int pruned;
 552
 553        if (unused == 0 || count == 0)
 554                return;
 555        spin_lock(&dcache_lock);
 556        if (count >= unused)
 557                prune_ratio = 1;
 558        else
 559                prune_ratio = unused / count;
 560        spin_lock(&sb_lock);
 561        list_for_each_entry(sb, &super_blocks, s_list) {
 562                if (list_empty(&sb->s_instances))
 563                        continue;
 564                if (sb->s_nr_dentry_unused == 0)
 565                        continue;
 566                sb->s_count++;
 567                /* Now, we reclaim unused dentrins with fairness.
 568                 * We reclaim them same percentage from each superblock.
 569                 * We calculate number of dentries to scan on this sb
 570                 * as follows, but the implementation is arranged to avoid
 571                 * overflows:
 572                 * number of dentries to scan on this sb =
 573                 * count * (number of dentries on this sb /
 574                 * number of dentries in the machine)
 575                 */
 576                spin_unlock(&sb_lock);
 577                if (prune_ratio != 1)
 578                        w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
 579                else
 580                        w_count = sb->s_nr_dentry_unused;
 581                pruned = w_count;
 582                /*
 583                 * We need to be sure this filesystem isn't being unmounted,
 584                 * otherwise we could race with generic_shutdown_super(), and
 585                 * end up holding a reference to an inode while the filesystem
 586                 * is unmounted.  So we try to get s_umount, and make sure
 587                 * s_root isn't NULL.
 588                 */
 589                if (down_read_trylock(&sb->s_umount)) {
 590                        if ((sb->s_root != NULL) &&
 591                            (!list_empty(&sb->s_dentry_lru))) {
 592                                spin_unlock(&dcache_lock);
 593                                __shrink_dcache_sb(sb, &w_count,
 594                                                DCACHE_REFERENCED);
 595                                pruned -= w_count;
 596                                spin_lock(&dcache_lock);
 597                        }
 598                        up_read(&sb->s_umount);
 599                }
 600                spin_lock(&sb_lock);
 601                if (p)
 602                        __put_super(p);
 603                count -= pruned;
 604                p = sb;
 605                /* more work left to do? */
 606                if (count <= 0)
 607                        break;
 608        }
 609        if (p)
 610                __put_super(p);
 611        spin_unlock(&sb_lock);
 612        spin_unlock(&dcache_lock);
 613}
 614
 615/**
 616 * shrink_dcache_sb - shrink dcache for a superblock
 617 * @sb: superblock
 618 *
 619 * Shrink the dcache for the specified super block. This is used to free
 620 * the dcache before unmounting a file system.
 621 */
 622void shrink_dcache_sb(struct super_block *sb)
 623{
 624        LIST_HEAD(tmp);
 625
 626        spin_lock(&dcache_lock);
 627        while (!list_empty(&sb->s_dentry_lru)) {
 628                list_splice_init(&sb->s_dentry_lru, &tmp);
 629                shrink_dentry_list(&tmp);
 630        }
 631        spin_unlock(&dcache_lock);
 632}
 633EXPORT_SYMBOL(shrink_dcache_sb);
 634
 635/*
 636 * destroy a single subtree of dentries for unmount
 637 * - see the comments on shrink_dcache_for_umount() for a description of the
 638 *   locking
 639 */
 640static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 641{
 642        struct dentry *parent;
 643        unsigned detached = 0;
 644
 645        BUG_ON(!IS_ROOT(dentry));
 646
 647        /* detach this root from the system */
 648        spin_lock(&dcache_lock);
 649        dentry_lru_del(dentry);
 650        __d_drop(dentry);
 651        spin_unlock(&dcache_lock);
 652
 653        for (;;) {
 654                /* descend to the first leaf in the current subtree */
 655                while (!list_empty(&dentry->d_subdirs)) {
 656                        struct dentry *loop;
 657
 658                        /* this is a branch with children - detach all of them
 659                         * from the system in one go */
 660                        spin_lock(&dcache_lock);
 661                        list_for_each_entry(loop, &dentry->d_subdirs,
 662                                            d_u.d_child) {
 663                                dentry_lru_del(loop);
 664                                __d_drop(loop);
 665                                cond_resched_lock(&dcache_lock);
 666                        }
 667                        spin_unlock(&dcache_lock);
 668
 669                        /* move to the first child */
 670                        dentry = list_entry(dentry->d_subdirs.next,
 671                                            struct dentry, d_u.d_child);
 672                }
 673
 674                /* consume the dentries from this leaf up through its parents
 675                 * until we find one with children or run out altogether */
 676                do {
 677                        struct inode *inode;
 678
 679                        if (atomic_read(&dentry->d_count) != 0) {
 680                                printk(KERN_ERR
 681                                       "BUG: Dentry %p{i=%lx,n=%s}"
 682                                       " still in use (%d)"
 683                                       " [unmount of %s %s]\n",
 684                                       dentry,
 685                                       dentry->d_inode ?
 686                                       dentry->d_inode->i_ino : 0UL,
 687                                       dentry->d_name.name,
 688                                       atomic_read(&dentry->d_count),
 689                                       dentry->d_sb->s_type->name,
 690                                       dentry->d_sb->s_id);
 691                                BUG();
 692                        }
 693
 694                        if (IS_ROOT(dentry))
 695                                parent = NULL;
 696                        else {
 697                                parent = dentry->d_parent;
 698                                atomic_dec(&parent->d_count);
 699                        }
 700
 701                        list_del(&dentry->d_u.d_child);
 702                        detached++;
 703
 704                        inode = dentry->d_inode;
 705                        if (inode) {
 706                                dentry->d_inode = NULL;
 707                                list_del_init(&dentry->d_alias);
 708                                if (dentry->d_op && dentry->d_op->d_iput)
 709                                        dentry->d_op->d_iput(dentry, inode);
 710                                else
 711                                        iput(inode);
 712                        }
 713
 714                        d_free(dentry);
 715
 716                        /* finished when we fall off the top of the tree,
 717                         * otherwise we ascend to the parent and move to the
 718                         * next sibling if there is one */
 719                        if (!parent)
 720                                return;
 721                        dentry = parent;
 722                } while (list_empty(&dentry->d_subdirs));
 723
 724                dentry = list_entry(dentry->d_subdirs.next,
 725                                    struct dentry, d_u.d_child);
 726        }
 727}
 728
 729/*
 730 * destroy the dentries attached to a superblock on unmounting
 731 * - we don't need to use dentry->d_lock, and only need dcache_lock when
 732 *   removing the dentry from the system lists and hashes because:
 733 *   - the superblock is detached from all mountings and open files, so the
 734 *     dentry trees will not be rearranged by the VFS
 735 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 736 *     any dentries belonging to this superblock that it comes across
 737 *   - the filesystem itself is no longer permitted to rearrange the dentries
 738 *     in this superblock
 739 */
 740void shrink_dcache_for_umount(struct super_block *sb)
 741{
 742        struct dentry *dentry;
 743
 744        if (down_read_trylock(&sb->s_umount))
 745                BUG();
 746
 747        dentry = sb->s_root;
 748        sb->s_root = NULL;
 749        atomic_dec(&dentry->d_count);
 750        shrink_dcache_for_umount_subtree(dentry);
 751
 752        while (!hlist_empty(&sb->s_anon)) {
 753                dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
 754                shrink_dcache_for_umount_subtree(dentry);
 755        }
 756}
 757
 758/*
 759 * Search for at least 1 mount point in the dentry's subdirs.
 760 * We descend to the next level whenever the d_subdirs
 761 * list is non-empty and continue searching.
 762 */
 763 
 764/**
 765 * have_submounts - check for mounts over a dentry
 766 * @parent: dentry to check.
 767 *
 768 * Return true if the parent or its subdirectories contain
 769 * a mount point
 770 */
 771 
 772int have_submounts(struct dentry *parent)
 773{
 774        struct dentry *this_parent = parent;
 775        struct list_head *next;
 776
 777        spin_lock(&dcache_lock);
 778        if (d_mountpoint(parent))
 779                goto positive;
 780repeat:
 781        next = this_parent->d_subdirs.next;
 782resume:
 783        while (next != &this_parent->d_subdirs) {
 784                struct list_head *tmp = next;
 785                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 786                next = tmp->next;
 787                /* Have we found a mount point ? */
 788                if (d_mountpoint(dentry))
 789                        goto positive;
 790                if (!list_empty(&dentry->d_subdirs)) {
 791                        this_parent = dentry;
 792                        goto repeat;
 793                }
 794        }
 795        /*
 796         * All done at this level ... ascend and resume the search.
 797         */
 798        if (this_parent != parent) {
 799                next = this_parent->d_u.d_child.next;
 800                this_parent = this_parent->d_parent;
 801                goto resume;
 802        }
 803        spin_unlock(&dcache_lock);
 804        return 0; /* No mount points found in tree */
 805positive:
 806        spin_unlock(&dcache_lock);
 807        return 1;
 808}
 809EXPORT_SYMBOL(have_submounts);
 810
 811/*
 812 * Search the dentry child list for the specified parent,
 813 * and move any unused dentries to the end of the unused
 814 * list for prune_dcache(). We descend to the next level
 815 * whenever the d_subdirs list is non-empty and continue
 816 * searching.
 817 *
 818 * It returns zero iff there are no unused children,
 819 * otherwise  it returns the number of children moved to
 820 * the end of the unused list. This may not be the total
 821 * number of unused children, because select_parent can
 822 * drop the lock and return early due to latency
 823 * constraints.
 824 */
 825static int select_parent(struct dentry * parent)
 826{
 827        struct dentry *this_parent = parent;
 828        struct list_head *next;
 829        int found = 0;
 830
 831        spin_lock(&dcache_lock);
 832repeat:
 833        next = this_parent->d_subdirs.next;
 834resume:
 835        while (next != &this_parent->d_subdirs) {
 836                struct list_head *tmp = next;
 837                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 838                next = tmp->next;
 839
 840                /* 
 841                 * move only zero ref count dentries to the end 
 842                 * of the unused list for prune_dcache
 843                 */
 844                if (!atomic_read(&dentry->d_count)) {
 845                        dentry_lru_move_tail(dentry);
 846                        found++;
 847                } else {
 848                        dentry_lru_del(dentry);
 849                }
 850
 851                /*
 852                 * We can return to the caller if we have found some (this
 853                 * ensures forward progress). We'll be coming back to find
 854                 * the rest.
 855                 */
 856                if (found && need_resched())
 857                        goto out;
 858
 859                /*
 860                 * Descend a level if the d_subdirs list is non-empty.
 861                 */
 862                if (!list_empty(&dentry->d_subdirs)) {
 863                        this_parent = dentry;
 864                        goto repeat;
 865                }
 866        }
 867        /*
 868         * All done at this level ... ascend and resume the search.
 869         */
 870        if (this_parent != parent) {
 871                next = this_parent->d_u.d_child.next;
 872                this_parent = this_parent->d_parent;
 873                goto resume;
 874        }
 875out:
 876        spin_unlock(&dcache_lock);
 877        return found;
 878}
 879
 880/**
 881 * shrink_dcache_parent - prune dcache
 882 * @parent: parent of entries to prune
 883 *
 884 * Prune the dcache to remove unused children of the parent dentry.
 885 */
 886 
 887void shrink_dcache_parent(struct dentry * parent)
 888{
 889        struct super_block *sb = parent->d_sb;
 890        int found;
 891
 892        while ((found = select_parent(parent)) != 0)
 893                __shrink_dcache_sb(sb, &found, 0);
 894}
 895EXPORT_SYMBOL(shrink_dcache_parent);
 896
 897/*
 898 * Scan `nr' dentries and return the number which remain.
 899 *
 900 * We need to avoid reentering the filesystem if the caller is performing a
 901 * GFP_NOFS allocation attempt.  One example deadlock is:
 902 *
 903 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
 904 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
 905 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
 906 *
 907 * In this case we return -1 to tell the caller that we baled.
 908 */
 909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 910{
 911        int nr_unused;
 912
 913        if (nr) {
 914                if (!(gfp_mask & __GFP_FS))
 915                        return -1;
 916                prune_dcache(nr);
 917        }
 918
 919        nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
 920        return (nr_unused / 100) * sysctl_vfs_cache_pressure;
 921}
 922
 923static struct shrinker dcache_shrinker = {
 924        .shrink = shrink_dcache_memory,
 925        .seeks = DEFAULT_SEEKS,
 926};
 927
 928/**
 929 * d_alloc      -       allocate a dcache entry
 930 * @parent: parent of entry to allocate
 931 * @name: qstr of the name
 932 *
 933 * Allocates a dentry. It returns %NULL if there is insufficient memory
 934 * available. On a success the dentry is returned. The name passed in is
 935 * copied and the copy passed in may be reused after this call.
 936 */
 937 
 938struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 939{
 940        struct dentry *dentry;
 941        char *dname;
 942
 943        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
 944        if (!dentry)
 945                return NULL;
 946
 947        if (name->len > DNAME_INLINE_LEN-1) {
 948                dname = kmalloc(name->len + 1, GFP_KERNEL);
 949                if (!dname) {
 950                        kmem_cache_free(dentry_cache, dentry); 
 951                        return NULL;
 952                }
 953        } else  {
 954                dname = dentry->d_iname;
 955        }       
 956        dentry->d_name.name = dname;
 957
 958        dentry->d_name.len = name->len;
 959        dentry->d_name.hash = name->hash;
 960        memcpy(dname, name->name, name->len);
 961        dname[name->len] = 0;
 962
 963        atomic_set(&dentry->d_count, 1);
 964        dentry->d_flags = DCACHE_UNHASHED;
 965        spin_lock_init(&dentry->d_lock);
 966        dentry->d_inode = NULL;
 967        dentry->d_parent = NULL;
 968        dentry->d_sb = NULL;
 969        dentry->d_op = NULL;
 970        dentry->d_fsdata = NULL;
 971        dentry->d_mounted = 0;
 972        INIT_HLIST_NODE(&dentry->d_hash);
 973        INIT_LIST_HEAD(&dentry->d_lru);
 974        INIT_LIST_HEAD(&dentry->d_subdirs);
 975        INIT_LIST_HEAD(&dentry->d_alias);
 976
 977        if (parent) {
 978                dentry->d_parent = dget(parent);
 979                dentry->d_sb = parent->d_sb;
 980        } else {
 981                INIT_LIST_HEAD(&dentry->d_u.d_child);
 982        }
 983
 984        spin_lock(&dcache_lock);
 985        if (parent)
 986                list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 987        spin_unlock(&dcache_lock);
 988
 989        percpu_counter_inc(&nr_dentry);
 990
 991        return dentry;
 992}
 993EXPORT_SYMBOL(d_alloc);
 994
 995struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 996{
 997        struct qstr q;
 998
 999        q.name = name;
1000        q.len = strlen(name);
1001        q.hash = full_name_hash(q.name, q.len);
1002        return d_alloc(parent, &q);
1003}
1004EXPORT_SYMBOL(d_alloc_name);
1005
1006/* the caller must hold dcache_lock */
1007static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1008{
1009        if (inode)
1010                list_add(&dentry->d_alias, &inode->i_dentry);
1011        dentry->d_inode = inode;
1012        fsnotify_d_instantiate(dentry, inode);
1013}
1014
1015/**
1016 * d_instantiate - fill in inode information for a dentry
1017 * @entry: dentry to complete
1018 * @inode: inode to attach to this dentry
1019 *
1020 * Fill in inode information in the entry.
1021 *
1022 * This turns negative dentries into productive full members
1023 * of society.
1024 *
1025 * NOTE! This assumes that the inode count has been incremented
1026 * (or otherwise set) by the caller to indicate that it is now
1027 * in use by the dcache.
1028 */
1029 
1030void d_instantiate(struct dentry *entry, struct inode * inode)
1031{
1032        BUG_ON(!list_empty(&entry->d_alias));
1033        spin_lock(&dcache_lock);
1034        __d_instantiate(entry, inode);
1035        spin_unlock(&dcache_lock);
1036        security_d_instantiate(entry, inode);
1037}
1038EXPORT_SYMBOL(d_instantiate);
1039
1040/**
1041 * d_instantiate_unique - instantiate a non-aliased dentry
1042 * @entry: dentry to instantiate
1043 * @inode: inode to attach to this dentry
1044 *
1045 * Fill in inode information in the entry. On success, it returns NULL.
1046 * If an unhashed alias of "entry" already exists, then we return the
1047 * aliased dentry instead and drop one reference to inode.
1048 *
1049 * Note that in order to avoid conflicts with rename() etc, the caller
1050 * had better be holding the parent directory semaphore.
1051 *
1052 * This also assumes that the inode count has been incremented
1053 * (or otherwise set) by the caller to indicate that it is now
1054 * in use by the dcache.
1055 */
1056static struct dentry *__d_instantiate_unique(struct dentry *entry,
1057                                             struct inode *inode)
1058{
1059        struct dentry *alias;
1060        int len = entry->d_name.len;
1061        const char *name = entry->d_name.name;
1062        unsigned int hash = entry->d_name.hash;
1063
1064        if (!inode) {
1065                __d_instantiate(entry, NULL);
1066                return NULL;
1067        }
1068
1069        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1070                struct qstr *qstr = &alias->d_name;
1071
1072                if (qstr->hash != hash)
1073                        continue;
1074                if (alias->d_parent != entry->d_parent)
1075                        continue;
1076                if (qstr->len != len)
1077                        continue;
1078                if (memcmp(qstr->name, name, len))
1079                        continue;
1080                dget_locked(alias);
1081                return alias;
1082        }
1083
1084        __d_instantiate(entry, inode);
1085        return NULL;
1086}
1087
1088struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1089{
1090        struct dentry *result;
1091
1092        BUG_ON(!list_empty(&entry->d_alias));
1093
1094        spin_lock(&dcache_lock);
1095        result = __d_instantiate_unique(entry, inode);
1096        spin_unlock(&dcache_lock);
1097
1098        if (!result) {
1099                security_d_instantiate(entry, inode);
1100                return NULL;
1101        }
1102
1103        BUG_ON(!d_unhashed(result));
1104        iput(inode);
1105        return result;
1106}
1107
1108EXPORT_SYMBOL(d_instantiate_unique);
1109
1110/**
1111 * d_alloc_root - allocate root dentry
1112 * @root_inode: inode to allocate the root for
1113 *
1114 * Allocate a root ("/") dentry for the inode given. The inode is
1115 * instantiated and returned. %NULL is returned if there is insufficient
1116 * memory or the inode passed is %NULL.
1117 */
1118 
1119struct dentry * d_alloc_root(struct inode * root_inode)
1120{
1121        struct dentry *res = NULL;
1122
1123        if (root_inode) {
1124                static const struct qstr name = { .name = "/", .len = 1 };
1125
1126                res = d_alloc(NULL, &name);
1127                if (res) {
1128                        res->d_sb = root_inode->i_sb;
1129                        res->d_parent = res;
1130                        d_instantiate(res, root_inode);
1131                }
1132        }
1133        return res;
1134}
1135EXPORT_SYMBOL(d_alloc_root);
1136
1137static inline struct hlist_head *d_hash(struct dentry *parent,
1138                                        unsigned long hash)
1139{
1140        hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1141        hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1142        return dentry_hashtable + (hash & D_HASHMASK);
1143}
1144
1145/**
1146 * d_obtain_alias - find or allocate a dentry for a given inode
1147 * @inode: inode to allocate the dentry for
1148 *
1149 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1150 * similar open by handle operations.  The returned dentry may be anonymous,
1151 * or may have a full name (if the inode was already in the cache).
1152 *
1153 * When called on a directory inode, we must ensure that the inode only ever
1154 * has one dentry.  If a dentry is found, that is returned instead of
1155 * allocating a new one.
1156 *
1157 * On successful return, the reference to the inode has been transferred
1158 * to the dentry.  In case of an error the reference on the inode is released.
1159 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1160 * be passed in and will be the error will be propagate to the return value,
1161 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1162 */
1163struct dentry *d_obtain_alias(struct inode *inode)
1164{
1165        static const struct qstr anonstring = { .name = "" };
1166        struct dentry *tmp;
1167        struct dentry *res;
1168
1169        if (!inode)
1170                return ERR_PTR(-ESTALE);
1171        if (IS_ERR(inode))
1172                return ERR_CAST(inode);
1173
1174        res = d_find_alias(inode);
1175        if (res)
1176                goto out_iput;
1177
1178        tmp = d_alloc(NULL, &anonstring);
1179        if (!tmp) {
1180                res = ERR_PTR(-ENOMEM);
1181                goto out_iput;
1182        }
1183        tmp->d_parent = tmp; /* make sure dput doesn't croak */
1184
1185        spin_lock(&dcache_lock);
1186        res = __d_find_alias(inode, 0);
1187        if (res) {
1188                spin_unlock(&dcache_lock);
1189                dput(tmp);
1190                goto out_iput;
1191        }
1192
1193        /* attach a disconnected dentry */
1194        spin_lock(&tmp->d_lock);
1195        tmp->d_sb = inode->i_sb;
1196        tmp->d_inode = inode;
1197        tmp->d_flags |= DCACHE_DISCONNECTED;
1198        tmp->d_flags &= ~DCACHE_UNHASHED;
1199        list_add(&tmp->d_alias, &inode->i_dentry);
1200        hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
1201        spin_unlock(&tmp->d_lock);
1202
1203        spin_unlock(&dcache_lock);
1204        return tmp;
1205
1206 out_iput:
1207        iput(inode);
1208        return res;
1209}
1210EXPORT_SYMBOL(d_obtain_alias);
1211
1212/**
1213 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1214 * @inode:  the inode which may have a disconnected dentry
1215 * @dentry: a negative dentry which we want to point to the inode.
1216 *
1217 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1218 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1219 * and return it, else simply d_add the inode to the dentry and return NULL.
1220 *
1221 * This is needed in the lookup routine of any filesystem that is exportable
1222 * (via knfsd) so that we can build dcache paths to directories effectively.
1223 *
1224 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1225 * is returned.  This matches the expected return value of ->lookup.
1226 *
1227 */
1228struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1229{
1230        struct dentry *new = NULL;
1231
1232        if (inode && S_ISDIR(inode->i_mode)) {
1233                spin_lock(&dcache_lock);
1234                new = __d_find_alias(inode, 1);
1235                if (new) {
1236                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1237                        spin_unlock(&dcache_lock);
1238                        security_d_instantiate(new, inode);
1239                        d_move(new, dentry);
1240                        iput(inode);
1241                } else {
1242                        /* already taking dcache_lock, so d_add() by hand */
1243                        __d_instantiate(dentry, inode);
1244                        spin_unlock(&dcache_lock);
1245                        security_d_instantiate(dentry, inode);
1246                        d_rehash(dentry);
1247                }
1248        } else
1249                d_add(dentry, inode);
1250        return new;
1251}
1252EXPORT_SYMBOL(d_splice_alias);
1253
1254/**
1255 * d_add_ci - lookup or allocate new dentry with case-exact name
1256 * @inode:  the inode case-insensitive lookup has found
1257 * @dentry: the negative dentry that was passed to the parent's lookup func
1258 * @name:   the case-exact name to be associated with the returned dentry
1259 *
1260 * This is to avoid filling the dcache with case-insensitive names to the
1261 * same inode, only the actual correct case is stored in the dcache for
1262 * case-insensitive filesystems.
1263 *
1264 * For a case-insensitive lookup match and if the the case-exact dentry
1265 * already exists in in the dcache, use it and return it.
1266 *
1267 * If no entry exists with the exact case name, allocate new dentry with
1268 * the exact case, and return the spliced entry.
1269 */
1270struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1271                        struct qstr *name)
1272{
1273        int error;
1274        struct dentry *found;
1275        struct dentry *new;
1276
1277        /*
1278         * First check if a dentry matching the name already exists,
1279         * if not go ahead and create it now.
1280         */
1281        found = d_hash_and_lookup(dentry->d_parent, name);
1282        if (!found) {
1283                new = d_alloc(dentry->d_parent, name);
1284                if (!new) {
1285                        error = -ENOMEM;
1286                        goto err_out;
1287                }
1288
1289                found = d_splice_alias(inode, new);
1290                if (found) {
1291                        dput(new);
1292                        return found;
1293                }
1294                return new;
1295        }
1296
1297        /*
1298         * If a matching dentry exists, and it's not negative use it.
1299         *
1300         * Decrement the reference count to balance the iget() done
1301         * earlier on.
1302         */
1303        if (found->d_inode) {
1304                if (unlikely(found->d_inode != inode)) {
1305                        /* This can't happen because bad inodes are unhashed. */
1306                        BUG_ON(!is_bad_inode(inode));
1307                        BUG_ON(!is_bad_inode(found->d_inode));
1308                }
1309                iput(inode);
1310                return found;
1311        }
1312
1313        /*
1314         * Negative dentry: instantiate it unless the inode is a directory and
1315         * already has a dentry.
1316         */
1317        spin_lock(&dcache_lock);
1318        if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1319                __d_instantiate(found, inode);
1320                spin_unlock(&dcache_lock);
1321                security_d_instantiate(found, inode);
1322                return found;
1323        }
1324
1325        /*
1326         * In case a directory already has a (disconnected) entry grab a
1327         * reference to it, move it in place and use it.
1328         */
1329        new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1330        dget_locked(new);
1331        spin_unlock(&dcache_lock);
1332        security_d_instantiate(found, inode);
1333        d_move(new, found);
1334        iput(inode);
1335        dput(found);
1336        return new;
1337
1338err_out:
1339        iput(inode);
1340        return ERR_PTR(error);
1341}
1342EXPORT_SYMBOL(d_add_ci);
1343
1344/**
1345 * d_lookup - search for a dentry
1346 * @parent: parent dentry
1347 * @name: qstr of name we wish to find
1348 * Returns: dentry, or NULL
1349 *
1350 * d_lookup searches the children of the parent dentry for the name in
1351 * question. If the dentry is found its reference count is incremented and the
1352 * dentry is returned. The caller must use dput to free the entry when it has
1353 * finished using it. %NULL is returned if the dentry does not exist.
1354 */
1355struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1356{
1357        struct dentry * dentry = NULL;
1358        unsigned long seq;
1359
1360        do {
1361                seq = read_seqbegin(&rename_lock);
1362                dentry = __d_lookup(parent, name);
1363                if (dentry)
1364                        break;
1365        } while (read_seqretry(&rename_lock, seq));
1366        return dentry;
1367}
1368EXPORT_SYMBOL(d_lookup);
1369
1370/*
1371 * __d_lookup - search for a dentry (racy)
1372 * @parent: parent dentry
1373 * @name: qstr of name we wish to find
1374 * Returns: dentry, or NULL
1375 *
1376 * __d_lookup is like d_lookup, however it may (rarely) return a
1377 * false-negative result due to unrelated rename activity.
1378 *
1379 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1380 * however it must be used carefully, eg. with a following d_lookup in
1381 * the case of failure.
1382 *
1383 * __d_lookup callers must be commented.
1384 */
1385struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1386{
1387        unsigned int len = name->len;
1388        unsigned int hash = name->hash;
1389        const unsigned char *str = name->name;
1390        struct hlist_head *head = d_hash(parent,hash);
1391        struct dentry *found = NULL;
1392        struct hlist_node *node;
1393        struct dentry *dentry;
1394
1395        /*
1396         * The hash list is protected using RCU.
1397         *
1398         * Take d_lock when comparing a candidate dentry, to avoid races
1399         * with d_move().
1400         *
1401         * It is possible that concurrent renames can mess up our list
1402         * walk here and result in missing our dentry, resulting in the
1403         * false-negative result. d_lookup() protects against concurrent
1404         * renames using rename_lock seqlock.
1405         *
1406         * See Documentation/vfs/dcache-locking.txt for more details.
1407         */
1408        rcu_read_lock();
1409        
1410        hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
1411                struct qstr *qstr;
1412
1413                if (dentry->d_name.hash != hash)
1414                        continue;
1415                if (dentry->d_parent != parent)
1416                        continue;
1417
1418                spin_lock(&dentry->d_lock);
1419
1420                /*
1421                 * Recheck the dentry after taking the lock - d_move may have
1422                 * changed things. Don't bother checking the hash because
1423                 * we're about to compare the whole name anyway.
1424                 */
1425                if (dentry->d_parent != parent)
1426                        goto next;
1427
1428                /* non-existing due to RCU? */
1429                if (d_unhashed(dentry))
1430                        goto next;
1431
1432                /*
1433                 * It is safe to compare names since d_move() cannot
1434                 * change the qstr (protected by d_lock).
1435                 */
1436                qstr = &dentry->d_name;
1437                if (parent->d_op && parent->d_op->d_compare) {
1438                        if (parent->d_op->d_compare(parent, qstr, name))
1439                                goto next;
1440                } else {
1441                        if (qstr->len != len)
1442                                goto next;
1443                        if (memcmp(qstr->name, str, len))
1444                                goto next;
1445                }
1446
1447                atomic_inc(&dentry->d_count);
1448                found = dentry;
1449                spin_unlock(&dentry->d_lock);
1450                break;
1451next:
1452                spin_unlock(&dentry->d_lock);
1453        }
1454        rcu_read_unlock();
1455
1456        return found;
1457}
1458
1459/**
1460 * d_hash_and_lookup - hash the qstr then search for a dentry
1461 * @dir: Directory to search in
1462 * @name: qstr of name we wish to find
1463 *
1464 * On hash failure or on lookup failure NULL is returned.
1465 */
1466struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1467{
1468        struct dentry *dentry = NULL;
1469
1470        /*
1471         * Check for a fs-specific hash function. Note that we must
1472         * calculate the standard hash first, as the d_op->d_hash()
1473         * routine may choose to leave the hash value unchanged.
1474         */
1475        name->hash = full_name_hash(name->name, name->len);
1476        if (dir->d_op && dir->d_op->d_hash) {
1477                if (dir->d_op->d_hash(dir, name) < 0)
1478                        goto out;
1479        }
1480        dentry = d_lookup(dir, name);
1481out:
1482        return dentry;
1483}
1484
1485/**
1486 * d_validate - verify dentry provided from insecure source
1487 * @dentry: The dentry alleged to be valid child of @dparent
1488 * @dparent: The parent dentry (known to be valid)
1489 *
1490 * An insecure source has sent us a dentry, here we verify it and dget() it.
1491 * This is used by ncpfs in its readdir implementation.
1492 * Zero is returned in the dentry is invalid.
1493 */
1494int d_validate(struct dentry *dentry, struct dentry *parent)
1495{
1496        struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
1497        struct hlist_node *node;
1498        struct dentry *d;
1499
1500        /* Check whether the ptr might be valid at all.. */
1501        if (!kmem_ptr_validate(dentry_cache, dentry))
1502                return 0;
1503        if (dentry->d_parent != parent)
1504                return 0;
1505
1506        rcu_read_lock();
1507        hlist_for_each_entry_rcu(d, node, head, d_hash) {
1508                if (d == dentry) {
1509                        dget(dentry);
1510                        return 1;
1511                }
1512        }
1513        rcu_read_unlock();
1514        return 0;
1515}
1516EXPORT_SYMBOL(d_validate);
1517
1518/*
1519 * When a file is deleted, we have two options:
1520 * - turn this dentry into a negative dentry
1521 * - unhash this dentry and free it.
1522 *
1523 * Usually, we want to just turn this into
1524 * a negative dentry, but if anybody else is
1525 * currently using the dentry or the inode
1526 * we can't do that and we fall back on removing
1527 * it from the hash queues and waiting for
1528 * it to be deleted later when it has no users
1529 */
1530 
1531/**
1532 * d_delete - delete a dentry
1533 * @dentry: The dentry to delete
1534 *
1535 * Turn the dentry into a negative dentry if possible, otherwise
1536 * remove it from the hash queues so it can be deleted later
1537 */
1538 
1539void d_delete(struct dentry * dentry)
1540{
1541        int isdir = 0;
1542        /*
1543         * Are we the only user?
1544         */
1545        spin_lock(&dcache_lock);
1546        spin_lock(&dentry->d_lock);
1547        isdir = S_ISDIR(dentry->d_inode->i_mode);
1548        if (atomic_read(&dentry->d_count) == 1) {
1549                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1550                dentry_iput(dentry);
1551                fsnotify_nameremove(dentry, isdir);
1552                return;
1553        }
1554
1555        if (!d_unhashed(dentry))
1556                __d_drop(dentry);
1557
1558        spin_unlock(&dentry->d_lock);
1559        spin_unlock(&dcache_lock);
1560
1561        fsnotify_nameremove(dentry, isdir);
1562}
1563EXPORT_SYMBOL(d_delete);
1564
1565static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1566{
1567
1568        entry->d_flags &= ~DCACHE_UNHASHED;
1569        hlist_add_head_rcu(&entry->d_hash, list);
1570}
1571
1572static void _d_rehash(struct dentry * entry)
1573{
1574        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1575}
1576
1577/**
1578 * d_rehash     - add an entry back to the hash
1579 * @entry: dentry to add to the hash
1580 *
1581 * Adds a dentry to the hash according to its name.
1582 */
1583 
1584void d_rehash(struct dentry * entry)
1585{
1586        spin_lock(&dcache_lock);
1587        spin_lock(&entry->d_lock);
1588        _d_rehash(entry);
1589        spin_unlock(&entry->d_lock);
1590        spin_unlock(&dcache_lock);
1591}
1592EXPORT_SYMBOL(d_rehash);
1593
1594/*
1595 * When switching names, the actual string doesn't strictly have to
1596 * be preserved in the target - because we're dropping the target
1597 * anyway. As such, we can just do a simple memcpy() to copy over
1598 * the new name before we switch.
1599 *
1600 * Note that we have to be a lot more careful about getting the hash
1601 * switched - we have to switch the hash value properly even if it
1602 * then no longer matches the actual (corrupted) string of the target.
1603 * The hash value has to match the hash queue that the dentry is on..
1604 */
1605static void switch_names(struct dentry *dentry, struct dentry *target)
1606{
1607        if (dname_external(target)) {
1608                if (dname_external(dentry)) {
1609                        /*
1610                         * Both external: swap the pointers
1611                         */
1612                        swap(target->d_name.name, dentry->d_name.name);
1613                } else {
1614                        /*
1615                         * dentry:internal, target:external.  Steal target's
1616                         * storage and make target internal.
1617                         */
1618                        memcpy(target->d_iname, dentry->d_name.name,
1619                                        dentry->d_name.len + 1);
1620                        dentry->d_name.name = target->d_name.name;
1621                        target->d_name.name = target->d_iname;
1622                }
1623        } else {
1624                if (dname_external(dentry)) {
1625                        /*
1626                         * dentry:external, target:internal.  Give dentry's
1627                         * storage to target and make dentry internal
1628                         */
1629                        memcpy(dentry->d_iname, target->d_name.name,
1630                                        target->d_name.len + 1);
1631                        target->d_name.name = dentry->d_name.name;
1632                        dentry->d_name.name = dentry->d_iname;
1633                } else {
1634                        /*
1635                         * Both are internal.  Just copy target to dentry
1636                         */
1637                        memcpy(dentry->d_iname, target->d_name.name,
1638                                        target->d_name.len + 1);
1639                        dentry->d_name.len = target->d_name.len;
1640                        return;
1641                }
1642        }
1643        swap(dentry->d_name.len, target->d_name.len);
1644}
1645
1646/*
1647 * We cannibalize "target" when moving dentry on top of it,
1648 * because it's going to be thrown away anyway. We could be more
1649 * polite about it, though.
1650 *
1651 * This forceful removal will result in ugly /proc output if
1652 * somebody holds a file open that got deleted due to a rename.
1653 * We could be nicer about the deleted file, and let it show
1654 * up under the name it had before it was deleted rather than
1655 * under the original name of the file that was moved on top of it.
1656 */
1657 
1658/*
1659 * d_move_locked - move a dentry
1660 * @dentry: entry to move
1661 * @target: new dentry
1662 *
1663 * Update the dcache to reflect the move of a file name. Negative
1664 * dcache entries should not be moved in this way.
1665 */
1666static void d_move_locked(struct dentry * dentry, struct dentry * target)
1667{
1668        struct hlist_head *list;
1669
1670        if (!dentry->d_inode)
1671                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1672
1673        write_seqlock(&rename_lock);
1674        /*
1675         * XXXX: do we really need to take target->d_lock?
1676         */
1677        if (target < dentry) {
1678                spin_lock(&target->d_lock);
1679                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1680        } else {
1681                spin_lock(&dentry->d_lock);
1682                spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1683        }
1684
1685        /* Move the dentry to the target hash queue, if on different bucket */
1686        if (d_unhashed(dentry))
1687                goto already_unhashed;
1688
1689        hlist_del_rcu(&dentry->d_hash);
1690
1691already_unhashed:
1692        list = d_hash(target->d_parent, target->d_name.hash);
1693        __d_rehash(dentry, list);
1694
1695        /* Unhash the target: dput() will then get rid of it */
1696        __d_drop(target);
1697
1698        list_del(&dentry->d_u.d_child);
1699        list_del(&target->d_u.d_child);
1700
1701        /* Switch the names.. */
1702        switch_names(dentry, target);
1703        swap(dentry->d_name.hash, target->d_name.hash);
1704
1705        /* ... and switch the parents */
1706        if (IS_ROOT(dentry)) {
1707                dentry->d_parent = target->d_parent;
1708                target->d_parent = target;
1709                INIT_LIST_HEAD(&target->d_u.d_child);
1710        } else {
1711                swap(dentry->d_parent, target->d_parent);
1712
1713                /* And add them back to the (new) parent lists */
1714                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1715        }
1716
1717        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1718        spin_unlock(&target->d_lock);
1719        fsnotify_d_move(dentry);
1720        spin_unlock(&dentry->d_lock);
1721        write_sequnlock(&rename_lock);
1722}
1723
1724/**
1725 * d_move - move a dentry
1726 * @dentry: entry to move
1727 * @target: new dentry
1728 *
1729 * Update the dcache to reflect the move of a file name. Negative
1730 * dcache entries should not be moved in this way.
1731 */
1732
1733void d_move(struct dentry * dentry, struct dentry * target)
1734{
1735        spin_lock(&dcache_lock);
1736        d_move_locked(dentry, target);
1737        spin_unlock(&dcache_lock);
1738}
1739EXPORT_SYMBOL(d_move);
1740
1741/**
1742 * d_ancestor - search for an ancestor
1743 * @p1: ancestor dentry
1744 * @p2: child dentry
1745 *
1746 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
1747 * an ancestor of p2, else NULL.
1748 */
1749struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1750{
1751        struct dentry *p;
1752
1753        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
1754                if (p->d_parent == p1)
1755                        return p;
1756        }
1757        return NULL;
1758}
1759
1760/*
1761 * This helper attempts to cope with remotely renamed directories
1762 *
1763 * It assumes that the caller is already holding
1764 * dentry->d_parent->d_inode->i_mutex and the dcache_lock
1765 *
1766 * Note: If ever the locking in lock_rename() changes, then please
1767 * remember to update this too...
1768 */
1769static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1770        __releases(dcache_lock)
1771{
1772        struct mutex *m1 = NULL, *m2 = NULL;
1773        struct dentry *ret;
1774
1775        /* If alias and dentry share a parent, then no extra locks required */
1776        if (alias->d_parent == dentry->d_parent)
1777                goto out_unalias;
1778
1779        /* Check for loops */
1780        ret = ERR_PTR(-ELOOP);
1781        if (d_ancestor(alias, dentry))
1782                goto out_err;
1783
1784        /* See lock_rename() */
1785        ret = ERR_PTR(-EBUSY);
1786        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
1787                goto out_err;
1788        m1 = &dentry->d_sb->s_vfs_rename_mutex;
1789        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
1790                goto out_err;
1791        m2 = &alias->d_parent->d_inode->i_mutex;
1792out_unalias:
1793        d_move_locked(alias, dentry);
1794        ret = alias;
1795out_err:
1796        spin_unlock(&dcache_lock);
1797        if (m2)
1798                mutex_unlock(m2);
1799        if (m1)
1800                mutex_unlock(m1);
1801        return ret;
1802}
1803
1804/*
1805 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1806 * named dentry in place of the dentry to be replaced.
1807 */
1808static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1809{
1810        struct dentry *dparent, *aparent;
1811
1812        switch_names(dentry, anon);
1813        swap(dentry->d_name.hash, anon->d_name.hash);
1814
1815        dparent = dentry->d_parent;
1816        aparent = anon->d_parent;
1817
1818        dentry->d_parent = (aparent == anon) ? dentry : aparent;
1819        list_del(&dentry->d_u.d_child);
1820        if (!IS_ROOT(dentry))
1821                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1822        else
1823                INIT_LIST_HEAD(&dentry->d_u.d_child);
1824
1825        anon->d_parent = (dparent == dentry) ? anon : dparent;
1826        list_del(&anon->d_u.d_child);
1827        if (!IS_ROOT(anon))
1828                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
1829        else
1830                INIT_LIST_HEAD(&anon->d_u.d_child);
1831
1832        anon->d_flags &= ~DCACHE_DISCONNECTED;
1833}
1834
1835/**
1836 * d_materialise_unique - introduce an inode into the tree
1837 * @dentry: candidate dentry
1838 * @inode: inode to bind to the dentry, to which aliases may be attached
1839 *
1840 * Introduces an dentry into the tree, substituting an extant disconnected
1841 * root directory alias in its place if there is one
1842 */
1843struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1844{
1845        struct dentry *actual;
1846
1847        BUG_ON(!d_unhashed(dentry));
1848
1849        spin_lock(&dcache_lock);
1850
1851        if (!inode) {
1852                actual = dentry;
1853                __d_instantiate(dentry, NULL);
1854                goto found_lock;
1855        }
1856
1857        if (S_ISDIR(inode->i_mode)) {
1858                struct dentry *alias;
1859
1860                /* Does an aliased dentry already exist? */
1861                alias = __d_find_alias(inode, 0);
1862                if (alias) {
1863                        actual = alias;
1864                        /* Is this an anonymous mountpoint that we could splice
1865                         * into our tree? */
1866                        if (IS_ROOT(alias)) {
1867                                spin_lock(&alias->d_lock);
1868                                __d_materialise_dentry(dentry, alias);
1869                                __d_drop(alias);
1870                                goto found;
1871                        }
1872                        /* Nope, but we must(!) avoid directory aliasing */
1873                        actual = __d_unalias(dentry, alias);
1874                        if (IS_ERR(actual))
1875                                dput(alias);
1876                        goto out_nolock;
1877                }
1878        }
1879
1880        /* Add a unique reference */
1881        actual = __d_instantiate_unique(dentry, inode);
1882        if (!actual)
1883                actual = dentry;
1884        else if (unlikely(!d_unhashed(actual)))
1885                goto shouldnt_be_hashed;
1886
1887found_lock:
1888        spin_lock(&actual->d_lock);
1889found:
1890        _d_rehash(actual);
1891        spin_unlock(&actual->d_lock);
1892        spin_unlock(&dcache_lock);
1893out_nolock:
1894        if (actual == dentry) {
1895                security_d_instantiate(dentry, inode);
1896                return NULL;
1897        }
1898
1899        iput(inode);
1900        return actual;
1901
1902shouldnt_be_hashed:
1903        spin_unlock(&dcache_lock);
1904        BUG();
1905}
1906EXPORT_SYMBOL_GPL(d_materialise_unique);
1907
1908static int prepend(char **buffer, int *buflen, const char *str, int namelen)
1909{
1910        *buflen -= namelen;
1911        if (*buflen < 0)
1912                return -ENAMETOOLONG;
1913        *buffer -= namelen;
1914        memcpy(*buffer, str, namelen);
1915        return 0;
1916}
1917
1918static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1919{
1920        return prepend(buffer, buflen, name->name, name->len);
1921}
1922
1923/**
1924 * Prepend path string to a buffer
1925 *
1926 * @path: the dentry/vfsmount to report
1927 * @root: root vfsmnt/dentry (may be modified by this function)
1928 * @buffer: pointer to the end of the buffer
1929 * @buflen: pointer to buffer length
1930 *
1931 * Caller holds the dcache_lock.
1932 *
1933 * If path is not reachable from the supplied root, then the value of
1934 * root is changed (without modifying refcounts).
1935 */
1936static int prepend_path(const struct path *path, struct path *root,
1937                        char **buffer, int *buflen)
1938{
1939        struct dentry *dentry = path->dentry;
1940        struct vfsmount *vfsmnt = path->mnt;
1941        bool slash = false;
1942        int error = 0;
1943
1944        br_read_lock(vfsmount_lock);
1945        while (dentry != root->dentry || vfsmnt != root->mnt) {
1946                struct dentry * parent;
1947
1948                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1949                        /* Global root? */
1950                        if (vfsmnt->mnt_parent == vfsmnt) {
1951                                goto global_root;
1952                        }
1953                        dentry = vfsmnt->mnt_mountpoint;
1954                        vfsmnt = vfsmnt->mnt_parent;
1955                        continue;
1956                }
1957                parent = dentry->d_parent;
1958                prefetch(parent);
1959                error = prepend_name(buffer, buflen, &dentry->d_name);
1960                if (!error)
1961                        error = prepend(buffer, buflen, "/", 1);
1962                if (error)
1963                        break;
1964
1965                slash = true;
1966                dentry = parent;
1967        }
1968
1969out:
1970        if (!error && !slash)
1971                error = prepend(buffer, buflen, "/", 1);
1972
1973        br_read_unlock(vfsmount_lock);
1974        return error;
1975
1976global_root:
1977        /*
1978         * Filesystems needing to implement special "root names"
1979         * should do so with ->d_dname()
1980         */
1981        if (IS_ROOT(dentry) &&
1982            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
1983                WARN(1, "Root dentry has weird name <%.*s>\n",
1984                     (int) dentry->d_name.len, dentry->d_name.name);
1985        }
1986        root->mnt = vfsmnt;
1987        root->dentry = dentry;
1988        goto out;
1989}
1990
1991/**
1992 * __d_path - return the path of a dentry
1993 * @path: the dentry/vfsmount to report
1994 * @root: root vfsmnt/dentry (may be modified by this function)
1995 * @buf: buffer to return value in
1996 * @buflen: buffer length
1997 *
1998 * Convert a dentry into an ASCII path name.
1999 *
2000 * Returns a pointer into the buffer or an error code if the
2001 * path was too long.
2002 *
2003 * "buflen" should be positive.
2004 *
2005 * If path is not reachable from the supplied root, then the value of
2006 * root is changed (without modifying refcounts).
2007 */
2008char *__d_path(const struct path *path, struct path *root,
2009               char *buf, int buflen)
2010{
2011        char *res = buf + buflen;
2012        int error;
2013
2014        prepend(&res, &buflen, "\0", 1);
2015        spin_lock(&dcache_lock);
2016        error = prepend_path(path, root, &res, &buflen);
2017        spin_unlock(&dcache_lock);
2018
2019        if (error)
2020                return ERR_PTR(error);
2021        return res;
2022}
2023
2024/*
2025 * same as __d_path but appends "(deleted)" for unlinked files.
2026 */
2027static int path_with_deleted(const struct path *path, struct path *root,
2028                                 char **buf, int *buflen)
2029{
2030        prepend(buf, buflen, "\0", 1);
2031        if (d_unlinked(path->dentry)) {
2032                int error = prepend(buf, buflen, " (deleted)", 10);
2033                if (error)
2034                        return error;
2035        }
2036
2037        return prepend_path(path, root, buf, buflen);
2038}
2039
2040static int prepend_unreachable(char **buffer, int *buflen)
2041{
2042        return prepend(buffer, buflen, "(unreachable)", 13);
2043}
2044
2045/**
2046 * d_path - return the path of a dentry
2047 * @path: path to report
2048 * @buf: buffer to return value in
2049 * @buflen: buffer length
2050 *
2051 * Convert a dentry into an ASCII path name. If the entry has been deleted
2052 * the string " (deleted)" is appended. Note that this is ambiguous.
2053 *
2054 * Returns a pointer into the buffer or an error code if the path was
2055 * too long. Note: Callers should use the returned pointer, not the passed
2056 * in buffer, to use the name! The implementation often starts at an offset
2057 * into the buffer, and may leave 0 bytes at the start.
2058 *
2059 * "buflen" should be positive.
2060 */
2061char *d_path(const struct path *path, char *buf, int buflen)
2062{
2063        char *res = buf + buflen;
2064        struct path root;
2065        struct path tmp;
2066        int error;
2067
2068        /*
2069         * We have various synthetic filesystems that never get mounted.  On
2070         * these filesystems dentries are never used for lookup purposes, and
2071         * thus don't need to be hashed.  They also don't need a name until a
2072         * user wants to identify the object in /proc/pid/fd/.  The little hack
2073         * below allows us to generate a name for these objects on demand:
2074         */
2075        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2076                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2077
2078        get_fs_root(current->fs, &root);
2079        spin_lock(&dcache_lock);
2080        tmp = root;
2081        error = path_with_deleted(path, &tmp, &res, &buflen);
2082        if (error)
2083                res = ERR_PTR(error);
2084        spin_unlock(&dcache_lock);
2085        path_put(&root);
2086        return res;
2087}
2088EXPORT_SYMBOL(d_path);
2089
2090/**
2091 * d_path_with_unreachable - return the path of a dentry
2092 * @path: path to report
2093 * @buf: buffer to return value in
2094 * @buflen: buffer length
2095 *
2096 * The difference from d_path() is that this prepends "(unreachable)"
2097 * to paths which are unreachable from the current process' root.
2098 */
2099char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2100{
2101        char *res = buf + buflen;
2102        struct path root;
2103        struct path tmp;
2104        int error;
2105
2106        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2107                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2108
2109        get_fs_root(current->fs, &root);
2110        spin_lock(&dcache_lock);
2111        tmp = root;
2112        error = path_with_deleted(path, &tmp, &res, &buflen);
2113        if (!error && !path_equal(&tmp, &root))
2114                error = prepend_unreachable(&res, &buflen);
2115        spin_unlock(&dcache_lock);
2116        path_put(&root);
2117        if (error)
2118                res =  ERR_PTR(error);
2119
2120        return res;
2121}
2122
2123/*
2124 * Helper function for dentry_operations.d_dname() members
2125 */
2126char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2127                        const char *fmt, ...)
2128{
2129        va_list args;
2130        char temp[64];
2131        int sz;
2132
2133        va_start(args, fmt);
2134        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2135        va_end(args);
2136
2137        if (sz > sizeof(temp) || sz > buflen)
2138                return ERR_PTR(-ENAMETOOLONG);
2139
2140        buffer += buflen - sz;
2141        return memcpy(buffer, temp, sz);
2142}
2143
2144/*
2145 * Write full pathname from the root of the filesystem into the buffer.
2146 */
2147char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2148{
2149        char *end = buf + buflen;
2150        char *retval;
2151
2152        prepend(&end, &buflen, "\0", 1);
2153        if (buflen < 1)
2154                goto Elong;
2155        /* Get '/' right */
2156        retval = end-1;
2157        *retval = '/';
2158
2159        while (!IS_ROOT(dentry)) {
2160                struct dentry *parent = dentry->d_parent;
2161
2162                prefetch(parent);
2163                if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
2164                    (prepend(&end, &buflen, "/", 1) != 0))
2165                        goto Elong;
2166
2167                retval = end;
2168                dentry = parent;
2169        }
2170        return retval;
2171Elong:
2172        return ERR_PTR(-ENAMETOOLONG);
2173}
2174EXPORT_SYMBOL(__dentry_path);
2175
2176char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2177{
2178        char *p = NULL;
2179        char *retval;
2180
2181        spin_lock(&dcache_lock);
2182        if (d_unlinked(dentry)) {
2183                p = buf + buflen;
2184                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2185                        goto Elong;
2186                buflen++;
2187        }
2188        retval = __dentry_path(dentry, buf, buflen);
2189        spin_unlock(&dcache_lock);
2190        if (!IS_ERR(retval) && p)
2191                *p = '/';       /* restore '/' overriden with '\0' */
2192        return retval;
2193Elong:
2194        spin_unlock(&dcache_lock);
2195        return ERR_PTR(-ENAMETOOLONG);
2196}
2197
2198/*
2199 * NOTE! The user-level library version returns a
2200 * character pointer. The kernel system call just
2201 * returns the length of the buffer filled (which
2202 * includes the ending '\0' character), or a negative
2203 * error value. So libc would do something like
2204 *
2205 *      char *getcwd(char * buf, size_t size)
2206 *      {
2207 *              int retval;
2208 *
2209 *              retval = sys_getcwd(buf, size);
2210 *              if (retval >= 0)
2211 *                      return buf;
2212 *              errno = -retval;
2213 *              return NULL;
2214 *      }
2215 */
2216SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2217{
2218        int error;
2219        struct path pwd, root;
2220        char *page = (char *) __get_free_page(GFP_USER);
2221
2222        if (!page)
2223                return -ENOMEM;
2224
2225        get_fs_root_and_pwd(current->fs, &root, &pwd);
2226
2227        error = -ENOENT;
2228        spin_lock(&dcache_lock);
2229        if (!d_unlinked(pwd.dentry)) {
2230                unsigned long len;
2231                struct path tmp = root;
2232                char *cwd = page + PAGE_SIZE;
2233                int buflen = PAGE_SIZE;
2234
2235                prepend(&cwd, &buflen, "\0", 1);
2236                error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2237                spin_unlock(&dcache_lock);
2238
2239                if (error)
2240                        goto out;
2241
2242                /* Unreachable from current root */
2243                if (!path_equal(&tmp, &root)) {
2244                        error = prepend_unreachable(&cwd, &buflen);
2245                        if (error)
2246                                goto out;
2247                }
2248
2249                error = -ERANGE;
2250                len = PAGE_SIZE + page - cwd;
2251                if (len <= size) {
2252                        error = len;
2253                        if (copy_to_user(buf, cwd, len))
2254                                error = -EFAULT;
2255                }
2256        } else
2257                spin_unlock(&dcache_lock);
2258
2259out:
2260        path_put(&pwd);
2261        path_put(&root);
2262        free_page((unsigned long) page);
2263        return error;
2264}
2265
2266/*
2267 * Test whether new_dentry is a subdirectory of old_dentry.
2268 *
2269 * Trivially implemented using the dcache structure
2270 */
2271
2272/**
2273 * is_subdir - is new dentry a subdirectory of old_dentry
2274 * @new_dentry: new dentry
2275 * @old_dentry: old dentry
2276 *
2277 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2278 * Returns 0 otherwise.
2279 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2280 */
2281  
2282int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2283{
2284        int result;
2285        unsigned long seq;
2286
2287        if (new_dentry == old_dentry)
2288                return 1;
2289
2290        /*
2291         * Need rcu_readlock to protect against the d_parent trashing
2292         * due to d_move
2293         */
2294        rcu_read_lock();
2295        do {
2296                /* for restarting inner loop in case of seq retry */
2297                seq = read_seqbegin(&rename_lock);
2298                if (d_ancestor(old_dentry, new_dentry))
2299                        result = 1;
2300                else
2301                        result = 0;
2302        } while (read_seqretry(&rename_lock, seq));
2303        rcu_read_unlock();
2304
2305        return result;
2306}
2307
2308int path_is_under(struct path *path1, struct path *path2)
2309{
2310        struct vfsmount *mnt = path1->mnt;
2311        struct dentry *dentry = path1->dentry;
2312        int res;
2313
2314        br_read_lock(vfsmount_lock);
2315        if (mnt != path2->mnt) {
2316                for (;;) {
2317                        if (mnt->mnt_parent == mnt) {
2318                                br_read_unlock(vfsmount_lock);
2319                                return 0;
2320                        }
2321                        if (mnt->mnt_parent == path2->mnt)
2322                                break;
2323                        mnt = mnt->mnt_parent;
2324                }
2325                dentry = mnt->mnt_mountpoint;
2326        }
2327        res = is_subdir(dentry, path2->dentry);
2328        br_read_unlock(vfsmount_lock);
2329        return res;
2330}
2331EXPORT_SYMBOL(path_is_under);
2332
2333void d_genocide(struct dentry *root)
2334{
2335        struct dentry *this_parent = root;
2336        struct list_head *next;
2337
2338        spin_lock(&dcache_lock);
2339repeat:
2340        next = this_parent->d_subdirs.next;
2341resume:
2342        while (next != &this_parent->d_subdirs) {
2343                struct list_head *tmp = next;
2344                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2345                next = tmp->next;
2346                if (d_unhashed(dentry)||!dentry->d_inode)
2347                        continue;
2348                if (!list_empty(&dentry->d_subdirs)) {
2349                        this_parent = dentry;
2350                        goto repeat;
2351                }
2352                atomic_dec(&dentry->d_count);
2353        }
2354        if (this_parent != root) {
2355                next = this_parent->d_u.d_child.next;
2356                atomic_dec(&this_parent->d_count);
2357                this_parent = this_parent->d_parent;
2358                goto resume;
2359        }
2360        spin_unlock(&dcache_lock);
2361}
2362
2363/**
2364 * find_inode_number - check for dentry with name
2365 * @dir: directory to check
2366 * @name: Name to find.
2367 *
2368 * Check whether a dentry already exists for the given name,
2369 * and return the inode number if it has an inode. Otherwise
2370 * 0 is returned.
2371 *
2372 * This routine is used to post-process directory listings for
2373 * filesystems using synthetic inode numbers, and is necessary
2374 * to keep getcwd() working.
2375 */
2376 
2377ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2378{
2379        struct dentry * dentry;
2380        ino_t ino = 0;
2381
2382        dentry = d_hash_and_lookup(dir, name);
2383        if (dentry) {
2384                if (dentry->d_inode)
2385                        ino = dentry->d_inode->i_ino;
2386                dput(dentry);
2387        }
2388        return ino;
2389}
2390EXPORT_SYMBOL(find_inode_number);
2391
2392static __initdata unsigned long dhash_entries;
2393static int __init set_dhash_entries(char *str)
2394{
2395        if (!str)
2396                return 0;
2397        dhash_entries = simple_strtoul(str, &str, 0);
2398        return 1;
2399}
2400__setup("dhash_entries=", set_dhash_entries);
2401
2402static void __init dcache_init_early(void)
2403{
2404        int loop;
2405
2406        /* If hashes are distributed across NUMA nodes, defer
2407         * hash allocation until vmalloc space is available.
2408         */
2409        if (hashdist)
2410                return;
2411
2412        dentry_hashtable =
2413                alloc_large_system_hash("Dentry cache",
2414                                        sizeof(struct hlist_head),
2415                                        dhash_entries,
2416                                        13,
2417                                        HASH_EARLY,
2418                                        &d_hash_shift,
2419                                        &d_hash_mask,
2420                                        0);
2421
2422        for (loop = 0; loop < (1 << d_hash_shift); loop++)
2423                INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2424}
2425
2426static void __init dcache_init(void)
2427{
2428        int loop;
2429
2430        percpu_counter_init(&nr_dentry, 0);
2431        percpu_counter_init(&nr_dentry_unused, 0);
2432
2433        /* 
2434         * A constructor could be added for stable state like the lists,
2435         * but it is probably not worth it because of the cache nature
2436         * of the dcache. 
2437         */
2438        dentry_cache = KMEM_CACHE(dentry,
2439                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
2440        
2441        register_shrinker(&dcache_shrinker);
2442
2443        /* Hash may have been set up in dcache_init_early */
2444        if (!hashdist)
2445                return;
2446
2447        dentry_hashtable =
2448                alloc_large_system_hash("Dentry cache",
2449                                        sizeof(struct hlist_head),
2450                                        dhash_entries,
2451                                        13,
2452                                        0,
2453                                        &d_hash_shift,
2454                                        &d_hash_mask,
2455                                        0);
2456
2457        for (loop = 0; loop < (1 << d_hash_shift); loop++)
2458                INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2459}
2460
2461/* SLAB cache for __getname() consumers */
2462struct kmem_cache *names_cachep __read_mostly;
2463EXPORT_SYMBOL(names_cachep);
2464
2465EXPORT_SYMBOL(d_genocide);
2466
2467void __init vfs_caches_init_early(void)
2468{
2469        dcache_init_early();
2470        inode_init_early();
2471}
2472
2473void __init vfs_caches_init(unsigned long mempages)
2474{
2475        unsigned long reserve;
2476
2477        /* Base hash sizes on available memory, with a reserve equal to
2478           150% of current kernel size */
2479
2480        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
2481        mempages -= reserve;
2482
2483        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
2484                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2485
2486        dcache_init();
2487        inode_init();
2488        files_init(mempages);
2489        mnt_init();
2490        bdev_cache_init();
2491        chrdev_init();
2492}
2493