linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/module.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include <linux/prefetch.h>
  39#include <linux/ratelimit.h>
  40#include "internal.h"
  41#include "mount.h"
  42
  43/*
  44 * Usage:
  45 * dcache->d_inode->i_lock protects:
  46 *   - i_dentry, d_alias, d_inode of aliases
  47 * dcache_hash_bucket lock protects:
  48 *   - the dcache hash table
  49 * s_anon bl list spinlock protects:
  50 *   - the s_anon list (see __d_drop)
  51 * dcache_lru_lock protects:
  52 *   - the dcache lru lists and counters
  53 * d_lock protects:
  54 *   - d_flags
  55 *   - d_name
  56 *   - d_lru
  57 *   - d_count
  58 *   - d_unhashed()
  59 *   - d_parent and d_subdirs
  60 *   - childrens' d_child and d_parent
  61 *   - d_alias, d_inode
  62 *
  63 * Ordering:
  64 * dentry->d_inode->i_lock
  65 *   dentry->d_lock
  66 *     dcache_lru_lock
  67 *     dcache_hash_bucket lock
  68 *     s_anon lock
  69 *
  70 * If there is an ancestor relationship:
  71 * dentry->d_parent->...->d_parent->d_lock
  72 *   ...
  73 *     dentry->d_parent->d_lock
  74 *       dentry->d_lock
  75 *
  76 * If no ancestor relationship:
  77 * if (dentry1 < dentry2)
  78 *   dentry1->d_lock
  79 *     dentry2->d_lock
  80 */
  81int sysctl_vfs_cache_pressure __read_mostly = 100;
  82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  83
  84static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  86
  87EXPORT_SYMBOL(rename_lock);
  88
  89static struct kmem_cache *dentry_cache __read_mostly;
  90
  91/*
  92 * This is the single most critical data structure when it comes
  93 * to the dcache: the hashtable for lookups. Somebody should try
  94 * to make this good - I've just made it work.
  95 *
  96 * This hash-function tries to avoid losing too many bits of hash
  97 * information, yet avoid using a prime hash-size or similar.
  98 */
  99#define D_HASHBITS     d_hash_shift
 100#define D_HASHMASK     d_hash_mask
 101
 102static unsigned int d_hash_mask __read_mostly;
 103static unsigned int d_hash_shift __read_mostly;
 104
 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
 106
 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 108                                        unsigned long hash)
 109{
 110        hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
 111        hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
 112        return dentry_hashtable + (hash & D_HASHMASK);
 113}
 114
 115/* Statistics gathering. */
 116struct dentry_stat_t dentry_stat = {
 117        .age_limit = 45,
 118};
 119
 120static DEFINE_PER_CPU(unsigned int, nr_dentry);
 121
 122#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 123static int get_nr_dentry(void)
 124{
 125        int i;
 126        int sum = 0;
 127        for_each_possible_cpu(i)
 128                sum += per_cpu(nr_dentry, i);
 129        return sum < 0 ? 0 : sum;
 130}
 131
 132int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 133                   size_t *lenp, loff_t *ppos)
 134{
 135        dentry_stat.nr_dentry = get_nr_dentry();
 136        return proc_dointvec(table, write, buffer, lenp, ppos);
 137}
 138#endif
 139
 140/*
 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 142 * The strings are both count bytes long, and count is non-zero.
 143 */
 144static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 145                                const unsigned char *ct, size_t tcount)
 146{
 147        if (scount != tcount)
 148                return 1;
 149
 150        do {
 151                if (*cs != *ct)
 152                        return 1;
 153                cs++;
 154                ct++;
 155                tcount--;
 156        } while (tcount);
 157        return 0;
 158}
 159
 160static void __d_free(struct rcu_head *head)
 161{
 162        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 163
 164        WARN_ON(!list_empty(&dentry->d_alias));
 165        if (dname_external(dentry))
 166                kfree(dentry->d_name.name);
 167        kmem_cache_free(dentry_cache, dentry); 
 168}
 169
 170/*
 171 * no locks, please.
 172 */
 173static void d_free(struct dentry *dentry)
 174{
 175        BUG_ON(dentry->d_count);
 176        this_cpu_dec(nr_dentry);
 177        if (dentry->d_op && dentry->d_op->d_release)
 178                dentry->d_op->d_release(dentry);
 179
 180        /* if dentry was never visible to RCU, immediate free is OK */
 181        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 182                __d_free(&dentry->d_u.d_rcu);
 183        else
 184                call_rcu(&dentry->d_u.d_rcu, __d_free);
 185}
 186
 187/**
 188 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 189 * @dentry: the target dentry
 190 * After this call, in-progress rcu-walk path lookup will fail. This
 191 * should be called after unhashing, and after changing d_inode (if
 192 * the dentry has not already been unhashed).
 193 */
 194static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 195{
 196        assert_spin_locked(&dentry->d_lock);
 197        /* Go through a barrier */
 198        write_seqcount_barrier(&dentry->d_seq);
 199}
 200
 201/*
 202 * Release the dentry's inode, using the filesystem
 203 * d_iput() operation if defined. Dentry has no refcount
 204 * and is unhashed.
 205 */
 206static void dentry_iput(struct dentry * dentry)
 207        __releases(dentry->d_lock)
 208        __releases(dentry->d_inode->i_lock)
 209{
 210        struct inode *inode = dentry->d_inode;
 211        if (inode) {
 212                dentry->d_inode = NULL;
 213                list_del_init(&dentry->d_alias);
 214                spin_unlock(&dentry->d_lock);
 215                spin_unlock(&inode->i_lock);
 216                if (!inode->i_nlink)
 217                        fsnotify_inoderemove(inode);
 218                if (dentry->d_op && dentry->d_op->d_iput)
 219                        dentry->d_op->d_iput(dentry, inode);
 220                else
 221                        iput(inode);
 222        } else {
 223                spin_unlock(&dentry->d_lock);
 224        }
 225}
 226
 227/*
 228 * Release the dentry's inode, using the filesystem
 229 * d_iput() operation if defined. dentry remains in-use.
 230 */
 231static void dentry_unlink_inode(struct dentry * dentry)
 232        __releases(dentry->d_lock)
 233        __releases(dentry->d_inode->i_lock)
 234{
 235        struct inode *inode = dentry->d_inode;
 236        dentry->d_inode = NULL;
 237        list_del_init(&dentry->d_alias);
 238        dentry_rcuwalk_barrier(dentry);
 239        spin_unlock(&dentry->d_lock);
 240        spin_unlock(&inode->i_lock);
 241        if (!inode->i_nlink)
 242                fsnotify_inoderemove(inode);
 243        if (dentry->d_op && dentry->d_op->d_iput)
 244                dentry->d_op->d_iput(dentry, inode);
 245        else
 246                iput(inode);
 247}
 248
 249/*
 250 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 251 */
 252static void dentry_lru_add(struct dentry *dentry)
 253{
 254        if (list_empty(&dentry->d_lru)) {
 255                spin_lock(&dcache_lru_lock);
 256                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 257                dentry->d_sb->s_nr_dentry_unused++;
 258                dentry_stat.nr_unused++;
 259                spin_unlock(&dcache_lru_lock);
 260        }
 261}
 262
 263static void __dentry_lru_del(struct dentry *dentry)
 264{
 265        list_del_init(&dentry->d_lru);
 266        dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 267        dentry->d_sb->s_nr_dentry_unused--;
 268        dentry_stat.nr_unused--;
 269}
 270
 271/*
 272 * Remove a dentry with references from the LRU.
 273 */
 274static void dentry_lru_del(struct dentry *dentry)
 275{
 276        if (!list_empty(&dentry->d_lru)) {
 277                spin_lock(&dcache_lru_lock);
 278                __dentry_lru_del(dentry);
 279                spin_unlock(&dcache_lru_lock);
 280        }
 281}
 282
 283/*
 284 * Remove a dentry that is unreferenced and about to be pruned
 285 * (unhashed and destroyed) from the LRU, and inform the file system.
 286 * This wrapper should be called _prior_ to unhashing a victim dentry.
 287 */
 288static void dentry_lru_prune(struct dentry *dentry)
 289{
 290        if (!list_empty(&dentry->d_lru)) {
 291                if (dentry->d_flags & DCACHE_OP_PRUNE)
 292                        dentry->d_op->d_prune(dentry);
 293
 294                spin_lock(&dcache_lru_lock);
 295                __dentry_lru_del(dentry);
 296                spin_unlock(&dcache_lru_lock);
 297        }
 298}
 299
 300static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 301{
 302        spin_lock(&dcache_lru_lock);
 303        if (list_empty(&dentry->d_lru)) {
 304                list_add_tail(&dentry->d_lru, list);
 305                dentry->d_sb->s_nr_dentry_unused++;
 306                dentry_stat.nr_unused++;
 307        } else {
 308                list_move_tail(&dentry->d_lru, list);
 309        }
 310        spin_unlock(&dcache_lru_lock);
 311}
 312
 313/**
 314 * d_kill - kill dentry and return parent
 315 * @dentry: dentry to kill
 316 * @parent: parent dentry
 317 *
 318 * The dentry must already be unhashed and removed from the LRU.
 319 *
 320 * If this is the root of the dentry tree, return NULL.
 321 *
 322 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 323 * d_kill.
 324 */
 325static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 326        __releases(dentry->d_lock)
 327        __releases(parent->d_lock)
 328        __releases(dentry->d_inode->i_lock)
 329{
 330        list_del(&dentry->d_u.d_child);
 331        /*
 332         * Inform try_to_ascend() that we are no longer attached to the
 333         * dentry tree
 334         */
 335        dentry->d_flags |= DCACHE_DISCONNECTED;
 336        if (parent)
 337                spin_unlock(&parent->d_lock);
 338        dentry_iput(dentry);
 339        /*
 340         * dentry_iput drops the locks, at which point nobody (except
 341         * transient RCU lookups) can reach this dentry.
 342         */
 343        d_free(dentry);
 344        return parent;
 345}
 346
 347/*
 348 * Unhash a dentry without inserting an RCU walk barrier or checking that
 349 * dentry->d_lock is locked.  The caller must take care of that, if
 350 * appropriate.
 351 */
 352static void __d_shrink(struct dentry *dentry)
 353{
 354        if (!d_unhashed(dentry)) {
 355                struct hlist_bl_head *b;
 356                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 357                        b = &dentry->d_sb->s_anon;
 358                else
 359                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 360
 361                hlist_bl_lock(b);
 362                __hlist_bl_del(&dentry->d_hash);
 363                dentry->d_hash.pprev = NULL;
 364                hlist_bl_unlock(b);
 365        }
 366}
 367
 368/**
 369 * d_drop - drop a dentry
 370 * @dentry: dentry to drop
 371 *
 372 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 373 * be found through a VFS lookup any more. Note that this is different from
 374 * deleting the dentry - d_delete will try to mark the dentry negative if
 375 * possible, giving a successful _negative_ lookup, while d_drop will
 376 * just make the cache lookup fail.
 377 *
 378 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 379 * reason (NFS timeouts or autofs deletes).
 380 *
 381 * __d_drop requires dentry->d_lock.
 382 */
 383void __d_drop(struct dentry *dentry)
 384{
 385        if (!d_unhashed(dentry)) {
 386                __d_shrink(dentry);
 387                dentry_rcuwalk_barrier(dentry);
 388        }
 389}
 390EXPORT_SYMBOL(__d_drop);
 391
 392void d_drop(struct dentry *dentry)
 393{
 394        spin_lock(&dentry->d_lock);
 395        __d_drop(dentry);
 396        spin_unlock(&dentry->d_lock);
 397}
 398EXPORT_SYMBOL(d_drop);
 399
 400/*
 401 * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
 402 * @dentry: dentry to drop
 403 *
 404 * This is called when we do a lookup on a placeholder dentry that needed to be
 405 * looked up.  The dentry should have been hashed in order for it to be found by
 406 * the lookup code, but now needs to be unhashed while we do the actual lookup
 407 * and clear the DCACHE_NEED_LOOKUP flag.
 408 */
 409void d_clear_need_lookup(struct dentry *dentry)
 410{
 411        spin_lock(&dentry->d_lock);
 412        __d_drop(dentry);
 413        dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
 414        spin_unlock(&dentry->d_lock);
 415}
 416EXPORT_SYMBOL(d_clear_need_lookup);
 417
 418/*
 419 * Finish off a dentry we've decided to kill.
 420 * dentry->d_lock must be held, returns with it unlocked.
 421 * If ref is non-zero, then decrement the refcount too.
 422 * Returns dentry requiring refcount drop, or NULL if we're done.
 423 */
 424static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 425        __releases(dentry->d_lock)
 426{
 427        struct inode *inode;
 428        struct dentry *parent;
 429
 430        inode = dentry->d_inode;
 431        if (inode && !spin_trylock(&inode->i_lock)) {
 432relock:
 433                spin_unlock(&dentry->d_lock);
 434                cpu_relax();
 435                return dentry; /* try again with same dentry */
 436        }
 437        if (IS_ROOT(dentry))
 438                parent = NULL;
 439        else
 440                parent = dentry->d_parent;
 441        if (parent && !spin_trylock(&parent->d_lock)) {
 442                if (inode)
 443                        spin_unlock(&inode->i_lock);
 444                goto relock;
 445        }
 446
 447        if (ref)
 448                dentry->d_count--;
 449        /*
 450         * if dentry was on the d_lru list delete it from there.
 451         * inform the fs via d_prune that this dentry is about to be
 452         * unhashed and destroyed.
 453         */
 454        dentry_lru_prune(dentry);
 455        /* if it was on the hash then remove it */
 456        __d_drop(dentry);
 457        return d_kill(dentry, parent);
 458}
 459
 460/* 
 461 * This is dput
 462 *
 463 * This is complicated by the fact that we do not want to put
 464 * dentries that are no longer on any hash chain on the unused
 465 * list: we'd much rather just get rid of them immediately.
 466 *
 467 * However, that implies that we have to traverse the dentry
 468 * tree upwards to the parents which might _also_ now be
 469 * scheduled for deletion (it may have been only waiting for
 470 * its last child to go away).
 471 *
 472 * This tail recursion is done by hand as we don't want to depend
 473 * on the compiler to always get this right (gcc generally doesn't).
 474 * Real recursion would eat up our stack space.
 475 */
 476
 477/*
 478 * dput - release a dentry
 479 * @dentry: dentry to release 
 480 *
 481 * Release a dentry. This will drop the usage count and if appropriate
 482 * call the dentry unlink method as well as removing it from the queues and
 483 * releasing its resources. If the parent dentries were scheduled for release
 484 * they too may now get deleted.
 485 */
 486void dput(struct dentry *dentry)
 487{
 488        if (!dentry)
 489                return;
 490
 491repeat:
 492        if (dentry->d_count == 1)
 493                might_sleep();
 494        spin_lock(&dentry->d_lock);
 495        BUG_ON(!dentry->d_count);
 496        if (dentry->d_count > 1) {
 497                dentry->d_count--;
 498                spin_unlock(&dentry->d_lock);
 499                return;
 500        }
 501
 502        if (dentry->d_flags & DCACHE_OP_DELETE) {
 503                if (dentry->d_op->d_delete(dentry))
 504                        goto kill_it;
 505        }
 506
 507        /* Unreachable? Get rid of it */
 508        if (d_unhashed(dentry))
 509                goto kill_it;
 510
 511        /*
 512         * If this dentry needs lookup, don't set the referenced flag so that it
 513         * is more likely to be cleaned up by the dcache shrinker in case of
 514         * memory pressure.
 515         */
 516        if (!d_need_lookup(dentry))
 517                dentry->d_flags |= DCACHE_REFERENCED;
 518        dentry_lru_add(dentry);
 519
 520        dentry->d_count--;
 521        spin_unlock(&dentry->d_lock);
 522        return;
 523
 524kill_it:
 525        dentry = dentry_kill(dentry, 1);
 526        if (dentry)
 527                goto repeat;
 528}
 529EXPORT_SYMBOL(dput);
 530
 531/**
 532 * d_invalidate - invalidate a dentry
 533 * @dentry: dentry to invalidate
 534 *
 535 * Try to invalidate the dentry if it turns out to be
 536 * possible. If there are other dentries that can be
 537 * reached through this one we can't delete it and we
 538 * return -EBUSY. On success we return 0.
 539 *
 540 * no dcache lock.
 541 */
 542 
 543int d_invalidate(struct dentry * dentry)
 544{
 545        /*
 546         * If it's already been dropped, return OK.
 547         */
 548        spin_lock(&dentry->d_lock);
 549        if (d_unhashed(dentry)) {
 550                spin_unlock(&dentry->d_lock);
 551                return 0;
 552        }
 553        /*
 554         * Check whether to do a partial shrink_dcache
 555         * to get rid of unused child entries.
 556         */
 557        if (!list_empty(&dentry->d_subdirs)) {
 558                spin_unlock(&dentry->d_lock);
 559                shrink_dcache_parent(dentry);
 560                spin_lock(&dentry->d_lock);
 561        }
 562
 563        /*
 564         * Somebody else still using it?
 565         *
 566         * If it's a directory, we can't drop it
 567         * for fear of somebody re-populating it
 568         * with children (even though dropping it
 569         * would make it unreachable from the root,
 570         * we might still populate it if it was a
 571         * working directory or similar).
 572         * We also need to leave mountpoints alone,
 573         * directory or not.
 574         */
 575        if (dentry->d_count > 1 && dentry->d_inode) {
 576                if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
 577                        spin_unlock(&dentry->d_lock);
 578                        return -EBUSY;
 579                }
 580        }
 581
 582        __d_drop(dentry);
 583        spin_unlock(&dentry->d_lock);
 584        return 0;
 585}
 586EXPORT_SYMBOL(d_invalidate);
 587
 588/* This must be called with d_lock held */
 589static inline void __dget_dlock(struct dentry *dentry)
 590{
 591        dentry->d_count++;
 592}
 593
 594static inline void __dget(struct dentry *dentry)
 595{
 596        spin_lock(&dentry->d_lock);
 597        __dget_dlock(dentry);
 598        spin_unlock(&dentry->d_lock);
 599}
 600
 601struct dentry *dget_parent(struct dentry *dentry)
 602{
 603        struct dentry *ret;
 604
 605repeat:
 606        /*
 607         * Don't need rcu_dereference because we re-check it was correct under
 608         * the lock.
 609         */
 610        rcu_read_lock();
 611        ret = dentry->d_parent;
 612        spin_lock(&ret->d_lock);
 613        if (unlikely(ret != dentry->d_parent)) {
 614                spin_unlock(&ret->d_lock);
 615                rcu_read_unlock();
 616                goto repeat;
 617        }
 618        rcu_read_unlock();
 619        BUG_ON(!ret->d_count);
 620        ret->d_count++;
 621        spin_unlock(&ret->d_lock);
 622        return ret;
 623}
 624EXPORT_SYMBOL(dget_parent);
 625
 626/**
 627 * d_find_alias - grab a hashed alias of inode
 628 * @inode: inode in question
 629 * @want_discon:  flag, used by d_splice_alias, to request
 630 *          that only a DISCONNECTED alias be returned.
 631 *
 632 * If inode has a hashed alias, or is a directory and has any alias,
 633 * acquire the reference to alias and return it. Otherwise return NULL.
 634 * Notice that if inode is a directory there can be only one alias and
 635 * it can be unhashed only if it has no children, or if it is the root
 636 * of a filesystem.
 637 *
 638 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 639 * any other hashed alias over that one unless @want_discon is set,
 640 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 641 */
 642static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 643{
 644        struct dentry *alias, *discon_alias;
 645
 646again:
 647        discon_alias = NULL;
 648        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 649                spin_lock(&alias->d_lock);
 650                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 651                        if (IS_ROOT(alias) &&
 652                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 653                                discon_alias = alias;
 654                        } else if (!want_discon) {
 655                                __dget_dlock(alias);
 656                                spin_unlock(&alias->d_lock);
 657                                return alias;
 658                        }
 659                }
 660                spin_unlock(&alias->d_lock);
 661        }
 662        if (discon_alias) {
 663                alias = discon_alias;
 664                spin_lock(&alias->d_lock);
 665                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 666                        if (IS_ROOT(alias) &&
 667                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 668                                __dget_dlock(alias);
 669                                spin_unlock(&alias->d_lock);
 670                                return alias;
 671                        }
 672                }
 673                spin_unlock(&alias->d_lock);
 674                goto again;
 675        }
 676        return NULL;
 677}
 678
 679struct dentry *d_find_alias(struct inode *inode)
 680{
 681        struct dentry *de = NULL;
 682
 683        if (!list_empty(&inode->i_dentry)) {
 684                spin_lock(&inode->i_lock);
 685                de = __d_find_alias(inode, 0);
 686                spin_unlock(&inode->i_lock);
 687        }
 688        return de;
 689}
 690EXPORT_SYMBOL(d_find_alias);
 691
 692/*
 693 *      Try to kill dentries associated with this inode.
 694 * WARNING: you must own a reference to inode.
 695 */
 696void d_prune_aliases(struct inode *inode)
 697{
 698        struct dentry *dentry;
 699restart:
 700        spin_lock(&inode->i_lock);
 701        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 702                spin_lock(&dentry->d_lock);
 703                if (!dentry->d_count) {
 704                        __dget_dlock(dentry);
 705                        __d_drop(dentry);
 706                        spin_unlock(&dentry->d_lock);
 707                        spin_unlock(&inode->i_lock);
 708                        dput(dentry);
 709                        goto restart;
 710                }
 711                spin_unlock(&dentry->d_lock);
 712        }
 713        spin_unlock(&inode->i_lock);
 714}
 715EXPORT_SYMBOL(d_prune_aliases);
 716
 717/*
 718 * Try to throw away a dentry - free the inode, dput the parent.
 719 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 720 * Releases dentry->d_lock.
 721 *
 722 * This may fail if locks cannot be acquired no problem, just try again.
 723 */
 724static void try_prune_one_dentry(struct dentry *dentry)
 725        __releases(dentry->d_lock)
 726{
 727        struct dentry *parent;
 728
 729        parent = dentry_kill(dentry, 0);
 730        /*
 731         * If dentry_kill returns NULL, we have nothing more to do.
 732         * if it returns the same dentry, trylocks failed. In either
 733         * case, just loop again.
 734         *
 735         * Otherwise, we need to prune ancestors too. This is necessary
 736         * to prevent quadratic behavior of shrink_dcache_parent(), but
 737         * is also expected to be beneficial in reducing dentry cache
 738         * fragmentation.
 739         */
 740        if (!parent)
 741                return;
 742        if (parent == dentry)
 743                return;
 744
 745        /* Prune ancestors. */
 746        dentry = parent;
 747        while (dentry) {
 748                spin_lock(&dentry->d_lock);
 749                if (dentry->d_count > 1) {
 750                        dentry->d_count--;
 751                        spin_unlock(&dentry->d_lock);
 752                        return;
 753                }
 754                dentry = dentry_kill(dentry, 1);
 755        }
 756}
 757
 758static void shrink_dentry_list(struct list_head *list)
 759{
 760        struct dentry *dentry;
 761
 762        rcu_read_lock();
 763        for (;;) {
 764                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 765                if (&dentry->d_lru == list)
 766                        break; /* empty */
 767                spin_lock(&dentry->d_lock);
 768                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 769                        spin_unlock(&dentry->d_lock);
 770                        continue;
 771                }
 772
 773                /*
 774                 * We found an inuse dentry which was not removed from
 775                 * the LRU because of laziness during lookup.  Do not free
 776                 * it - just keep it off the LRU list.
 777                 */
 778                if (dentry->d_count) {
 779                        dentry_lru_del(dentry);
 780                        spin_unlock(&dentry->d_lock);
 781                        continue;
 782                }
 783
 784                rcu_read_unlock();
 785
 786                try_prune_one_dentry(dentry);
 787
 788                rcu_read_lock();
 789        }
 790        rcu_read_unlock();
 791}
 792
 793/**
 794 * prune_dcache_sb - shrink the dcache
 795 * @sb: superblock
 796 * @count: number of entries to try to free
 797 *
 798 * Attempt to shrink the superblock dcache LRU by @count entries. This is
 799 * done when we need more memory an called from the superblock shrinker
 800 * function.
 801 *
 802 * This function may fail to free any resources if all the dentries are in
 803 * use.
 804 */
 805void prune_dcache_sb(struct super_block *sb, int count)
 806{
 807        struct dentry *dentry;
 808        LIST_HEAD(referenced);
 809        LIST_HEAD(tmp);
 810
 811relock:
 812        spin_lock(&dcache_lru_lock);
 813        while (!list_empty(&sb->s_dentry_lru)) {
 814                dentry = list_entry(sb->s_dentry_lru.prev,
 815                                struct dentry, d_lru);
 816                BUG_ON(dentry->d_sb != sb);
 817
 818                if (!spin_trylock(&dentry->d_lock)) {
 819                        spin_unlock(&dcache_lru_lock);
 820                        cpu_relax();
 821                        goto relock;
 822                }
 823
 824                if (dentry->d_flags & DCACHE_REFERENCED) {
 825                        dentry->d_flags &= ~DCACHE_REFERENCED;
 826                        list_move(&dentry->d_lru, &referenced);
 827                        spin_unlock(&dentry->d_lock);
 828                } else {
 829                        list_move_tail(&dentry->d_lru, &tmp);
 830                        dentry->d_flags |= DCACHE_SHRINK_LIST;
 831                        spin_unlock(&dentry->d_lock);
 832                        if (!--count)
 833                                break;
 834                }
 835                cond_resched_lock(&dcache_lru_lock);
 836        }
 837        if (!list_empty(&referenced))
 838                list_splice(&referenced, &sb->s_dentry_lru);
 839        spin_unlock(&dcache_lru_lock);
 840
 841        shrink_dentry_list(&tmp);
 842}
 843
 844/**
 845 * shrink_dcache_sb - shrink dcache for a superblock
 846 * @sb: superblock
 847 *
 848 * Shrink the dcache for the specified super block. This is used to free
 849 * the dcache before unmounting a file system.
 850 */
 851void shrink_dcache_sb(struct super_block *sb)
 852{
 853        LIST_HEAD(tmp);
 854
 855        spin_lock(&dcache_lru_lock);
 856        while (!list_empty(&sb->s_dentry_lru)) {
 857                list_splice_init(&sb->s_dentry_lru, &tmp);
 858                spin_unlock(&dcache_lru_lock);
 859                shrink_dentry_list(&tmp);
 860                spin_lock(&dcache_lru_lock);
 861        }
 862        spin_unlock(&dcache_lru_lock);
 863}
 864EXPORT_SYMBOL(shrink_dcache_sb);
 865
 866/*
 867 * destroy a single subtree of dentries for unmount
 868 * - see the comments on shrink_dcache_for_umount() for a description of the
 869 *   locking
 870 */
 871static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 872{
 873        struct dentry *parent;
 874
 875        BUG_ON(!IS_ROOT(dentry));
 876
 877        for (;;) {
 878                /* descend to the first leaf in the current subtree */
 879                while (!list_empty(&dentry->d_subdirs))
 880                        dentry = list_entry(dentry->d_subdirs.next,
 881                                            struct dentry, d_u.d_child);
 882
 883                /* consume the dentries from this leaf up through its parents
 884                 * until we find one with children or run out altogether */
 885                do {
 886                        struct inode *inode;
 887
 888                        /*
 889                         * remove the dentry from the lru, and inform
 890                         * the fs that this dentry is about to be
 891                         * unhashed and destroyed.
 892                         */
 893                        dentry_lru_prune(dentry);
 894                        __d_shrink(dentry);
 895
 896                        if (dentry->d_count != 0) {
 897                                printk(KERN_ERR
 898                                       "BUG: Dentry %p{i=%lx,n=%s}"
 899                                       " still in use (%d)"
 900                                       " [unmount of %s %s]\n",
 901                                       dentry,
 902                                       dentry->d_inode ?
 903                                       dentry->d_inode->i_ino : 0UL,
 904                                       dentry->d_name.name,
 905                                       dentry->d_count,
 906                                       dentry->d_sb->s_type->name,
 907                                       dentry->d_sb->s_id);
 908                                BUG();
 909                        }
 910
 911                        if (IS_ROOT(dentry)) {
 912                                parent = NULL;
 913                                list_del(&dentry->d_u.d_child);
 914                        } else {
 915                                parent = dentry->d_parent;
 916                                parent->d_count--;
 917                                list_del(&dentry->d_u.d_child);
 918                        }
 919
 920                        inode = dentry->d_inode;
 921                        if (inode) {
 922                                dentry->d_inode = NULL;
 923                                list_del_init(&dentry->d_alias);
 924                                if (dentry->d_op && dentry->d_op->d_iput)
 925                                        dentry->d_op->d_iput(dentry, inode);
 926                                else
 927                                        iput(inode);
 928                        }
 929
 930                        d_free(dentry);
 931
 932                        /* finished when we fall off the top of the tree,
 933                         * otherwise we ascend to the parent and move to the
 934                         * next sibling if there is one */
 935                        if (!parent)
 936                                return;
 937                        dentry = parent;
 938                } while (list_empty(&dentry->d_subdirs));
 939
 940                dentry = list_entry(dentry->d_subdirs.next,
 941                                    struct dentry, d_u.d_child);
 942        }
 943}
 944
 945/*
 946 * destroy the dentries attached to a superblock on unmounting
 947 * - we don't need to use dentry->d_lock because:
 948 *   - the superblock is detached from all mountings and open files, so the
 949 *     dentry trees will not be rearranged by the VFS
 950 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 951 *     any dentries belonging to this superblock that it comes across
 952 *   - the filesystem itself is no longer permitted to rearrange the dentries
 953 *     in this superblock
 954 */
 955void shrink_dcache_for_umount(struct super_block *sb)
 956{
 957        struct dentry *dentry;
 958
 959        if (down_read_trylock(&sb->s_umount))
 960                BUG();
 961
 962        dentry = sb->s_root;
 963        sb->s_root = NULL;
 964        dentry->d_count--;
 965        shrink_dcache_for_umount_subtree(dentry);
 966
 967        while (!hlist_bl_empty(&sb->s_anon)) {
 968                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 969                shrink_dcache_for_umount_subtree(dentry);
 970        }
 971}
 972
 973/*
 974 * This tries to ascend one level of parenthood, but
 975 * we can race with renaming, so we need to re-check
 976 * the parenthood after dropping the lock and check
 977 * that the sequence number still matches.
 978 */
 979static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
 980{
 981        struct dentry *new = old->d_parent;
 982
 983        rcu_read_lock();
 984        spin_unlock(&old->d_lock);
 985        spin_lock(&new->d_lock);
 986
 987        /*
 988         * might go back up the wrong parent if we have had a rename
 989         * or deletion
 990         */
 991        if (new != old->d_parent ||
 992                 (old->d_flags & DCACHE_DISCONNECTED) ||
 993                 (!locked && read_seqretry(&rename_lock, seq))) {
 994                spin_unlock(&new->d_lock);
 995                new = NULL;
 996        }
 997        rcu_read_unlock();
 998        return new;
 999}
1000
1001
1002/*
1003 * Search for at least 1 mount point in the dentry's subdirs.
1004 * We descend to the next level whenever the d_subdirs
1005 * list is non-empty and continue searching.
1006 */
1007 
1008/**
1009 * have_submounts - check for mounts over a dentry
1010 * @parent: dentry to check.
1011 *
1012 * Return true if the parent or its subdirectories contain
1013 * a mount point
1014 */
1015int have_submounts(struct dentry *parent)
1016{
1017        struct dentry *this_parent;
1018        struct list_head *next;
1019        unsigned seq;
1020        int locked = 0;
1021
1022        seq = read_seqbegin(&rename_lock);
1023again:
1024        this_parent = parent;
1025
1026        if (d_mountpoint(parent))
1027                goto positive;
1028        spin_lock(&this_parent->d_lock);
1029repeat:
1030        next = this_parent->d_subdirs.next;
1031resume:
1032        while (next != &this_parent->d_subdirs) {
1033                struct list_head *tmp = next;
1034                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1035                next = tmp->next;
1036
1037                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1038                /* Have we found a mount point ? */
1039                if (d_mountpoint(dentry)) {
1040                        spin_unlock(&dentry->d_lock);
1041                        spin_unlock(&this_parent->d_lock);
1042                        goto positive;
1043                }
1044                if (!list_empty(&dentry->d_subdirs)) {
1045                        spin_unlock(&this_parent->d_lock);
1046                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1047                        this_parent = dentry;
1048                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1049                        goto repeat;
1050                }
1051                spin_unlock(&dentry->d_lock);
1052        }
1053        /*
1054         * All done at this level ... ascend and resume the search.
1055         */
1056        if (this_parent != parent) {
1057                struct dentry *child = this_parent;
1058                this_parent = try_to_ascend(this_parent, locked, seq);
1059                if (!this_parent)
1060                        goto rename_retry;
1061                next = child->d_u.d_child.next;
1062                goto resume;
1063        }
1064        spin_unlock(&this_parent->d_lock);
1065        if (!locked && read_seqretry(&rename_lock, seq))
1066                goto rename_retry;
1067        if (locked)
1068                write_sequnlock(&rename_lock);
1069        return 0; /* No mount points found in tree */
1070positive:
1071        if (!locked && read_seqretry(&rename_lock, seq))
1072                goto rename_retry;
1073        if (locked)
1074                write_sequnlock(&rename_lock);
1075        return 1;
1076
1077rename_retry:
1078        locked = 1;
1079        write_seqlock(&rename_lock);
1080        goto again;
1081}
1082EXPORT_SYMBOL(have_submounts);
1083
1084/*
1085 * Search the dentry child list for the specified parent,
1086 * and move any unused dentries to the end of the unused
1087 * list for prune_dcache(). We descend to the next level
1088 * whenever the d_subdirs list is non-empty and continue
1089 * searching.
1090 *
1091 * It returns zero iff there are no unused children,
1092 * otherwise  it returns the number of children moved to
1093 * the end of the unused list. This may not be the total
1094 * number of unused children, because select_parent can
1095 * drop the lock and return early due to latency
1096 * constraints.
1097 */
1098static int select_parent(struct dentry *parent, struct list_head *dispose)
1099{
1100        struct dentry *this_parent;
1101        struct list_head *next;
1102        unsigned seq;
1103        int found = 0;
1104        int locked = 0;
1105
1106        seq = read_seqbegin(&rename_lock);
1107again:
1108        this_parent = parent;
1109        spin_lock(&this_parent->d_lock);
1110repeat:
1111        next = this_parent->d_subdirs.next;
1112resume:
1113        while (next != &this_parent->d_subdirs) {
1114                struct list_head *tmp = next;
1115                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1116                next = tmp->next;
1117
1118                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1119
1120                /*
1121                 * move only zero ref count dentries to the dispose list.
1122                 *
1123                 * Those which are presently on the shrink list, being processed
1124                 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1125                 * loop in shrink_dcache_parent() might not make any progress
1126                 * and loop forever.
1127                 */
1128                if (dentry->d_count) {
1129                        dentry_lru_del(dentry);
1130                } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1131                        dentry_lru_move_list(dentry, dispose);
1132                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1133                        found++;
1134                }
1135                /*
1136                 * We can return to the caller if we have found some (this
1137                 * ensures forward progress). We'll be coming back to find
1138                 * the rest.
1139                 */
1140                if (found && need_resched()) {
1141                        spin_unlock(&dentry->d_lock);
1142                        goto out;
1143                }
1144
1145                /*
1146                 * Descend a level if the d_subdirs list is non-empty.
1147                 */
1148                if (!list_empty(&dentry->d_subdirs)) {
1149                        spin_unlock(&this_parent->d_lock);
1150                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1151                        this_parent = dentry;
1152                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1153                        goto repeat;
1154                }
1155
1156                spin_unlock(&dentry->d_lock);
1157        }
1158        /*
1159         * All done at this level ... ascend and resume the search.
1160         */
1161        if (this_parent != parent) {
1162                struct dentry *child = this_parent;
1163                this_parent = try_to_ascend(this_parent, locked, seq);
1164                if (!this_parent)
1165                        goto rename_retry;
1166                next = child->d_u.d_child.next;
1167                goto resume;
1168        }
1169out:
1170        spin_unlock(&this_parent->d_lock);
1171        if (!locked && read_seqretry(&rename_lock, seq))
1172                goto rename_retry;
1173        if (locked)
1174                write_sequnlock(&rename_lock);
1175        return found;
1176
1177rename_retry:
1178        if (found)
1179                return found;
1180        locked = 1;
1181        write_seqlock(&rename_lock);
1182        goto again;
1183}
1184
1185/**
1186 * shrink_dcache_parent - prune dcache
1187 * @parent: parent of entries to prune
1188 *
1189 * Prune the dcache to remove unused children of the parent dentry.
1190 */
1191void shrink_dcache_parent(struct dentry * parent)
1192{
1193        LIST_HEAD(dispose);
1194        int found;
1195
1196        while ((found = select_parent(parent, &dispose)) != 0)
1197                shrink_dentry_list(&dispose);
1198}
1199EXPORT_SYMBOL(shrink_dcache_parent);
1200
1201/**
1202 * __d_alloc    -       allocate a dcache entry
1203 * @sb: filesystem it will belong to
1204 * @name: qstr of the name
1205 *
1206 * Allocates a dentry. It returns %NULL if there is insufficient memory
1207 * available. On a success the dentry is returned. The name passed in is
1208 * copied and the copy passed in may be reused after this call.
1209 */
1210 
1211struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1212{
1213        struct dentry *dentry;
1214        char *dname;
1215
1216        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1217        if (!dentry)
1218                return NULL;
1219
1220        if (name->len > DNAME_INLINE_LEN-1) {
1221                dname = kmalloc(name->len + 1, GFP_KERNEL);
1222                if (!dname) {
1223                        kmem_cache_free(dentry_cache, dentry); 
1224                        return NULL;
1225                }
1226        } else  {
1227                dname = dentry->d_iname;
1228        }       
1229        dentry->d_name.name = dname;
1230
1231        dentry->d_name.len = name->len;
1232        dentry->d_name.hash = name->hash;
1233        memcpy(dname, name->name, name->len);
1234        dname[name->len] = 0;
1235
1236        dentry->d_count = 1;
1237        dentry->d_flags = 0;
1238        spin_lock_init(&dentry->d_lock);
1239        seqcount_init(&dentry->d_seq);
1240        dentry->d_inode = NULL;
1241        dentry->d_parent = dentry;
1242        dentry->d_sb = sb;
1243        dentry->d_op = NULL;
1244        dentry->d_fsdata = NULL;
1245        INIT_HLIST_BL_NODE(&dentry->d_hash);
1246        INIT_LIST_HEAD(&dentry->d_lru);
1247        INIT_LIST_HEAD(&dentry->d_subdirs);
1248        INIT_LIST_HEAD(&dentry->d_alias);
1249        INIT_LIST_HEAD(&dentry->d_u.d_child);
1250        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1251
1252        this_cpu_inc(nr_dentry);
1253
1254        return dentry;
1255}
1256
1257/**
1258 * d_alloc      -       allocate a dcache entry
1259 * @parent: parent of entry to allocate
1260 * @name: qstr of the name
1261 *
1262 * Allocates a dentry. It returns %NULL if there is insufficient memory
1263 * available. On a success the dentry is returned. The name passed in is
1264 * copied and the copy passed in may be reused after this call.
1265 */
1266struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1267{
1268        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1269        if (!dentry)
1270                return NULL;
1271
1272        spin_lock(&parent->d_lock);
1273        /*
1274         * don't need child lock because it is not subject
1275         * to concurrency here
1276         */
1277        __dget_dlock(parent);
1278        dentry->d_parent = parent;
1279        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1280        spin_unlock(&parent->d_lock);
1281
1282        return dentry;
1283}
1284EXPORT_SYMBOL(d_alloc);
1285
1286struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1287{
1288        struct dentry *dentry = __d_alloc(sb, name);
1289        if (dentry)
1290                dentry->d_flags |= DCACHE_DISCONNECTED;
1291        return dentry;
1292}
1293EXPORT_SYMBOL(d_alloc_pseudo);
1294
1295struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1296{
1297        struct qstr q;
1298
1299        q.name = name;
1300        q.len = strlen(name);
1301        q.hash = full_name_hash(q.name, q.len);
1302        return d_alloc(parent, &q);
1303}
1304EXPORT_SYMBOL(d_alloc_name);
1305
1306void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1307{
1308        WARN_ON_ONCE(dentry->d_op);
1309        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1310                                DCACHE_OP_COMPARE       |
1311                                DCACHE_OP_REVALIDATE    |
1312                                DCACHE_OP_DELETE ));
1313        dentry->d_op = op;
1314        if (!op)
1315                return;
1316        if (op->d_hash)
1317                dentry->d_flags |= DCACHE_OP_HASH;
1318        if (op->d_compare)
1319                dentry->d_flags |= DCACHE_OP_COMPARE;
1320        if (op->d_revalidate)
1321                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1322        if (op->d_delete)
1323                dentry->d_flags |= DCACHE_OP_DELETE;
1324        if (op->d_prune)
1325                dentry->d_flags |= DCACHE_OP_PRUNE;
1326
1327}
1328EXPORT_SYMBOL(d_set_d_op);
1329
1330static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1331{
1332        spin_lock(&dentry->d_lock);
1333        if (inode) {
1334                if (unlikely(IS_AUTOMOUNT(inode)))
1335                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1336                list_add(&dentry->d_alias, &inode->i_dentry);
1337        }
1338        dentry->d_inode = inode;
1339        dentry_rcuwalk_barrier(dentry);
1340        spin_unlock(&dentry->d_lock);
1341        fsnotify_d_instantiate(dentry, inode);
1342}
1343
1344/**
1345 * d_instantiate - fill in inode information for a dentry
1346 * @entry: dentry to complete
1347 * @inode: inode to attach to this dentry
1348 *
1349 * Fill in inode information in the entry.
1350 *
1351 * This turns negative dentries into productive full members
1352 * of society.
1353 *
1354 * NOTE! This assumes that the inode count has been incremented
1355 * (or otherwise set) by the caller to indicate that it is now
1356 * in use by the dcache.
1357 */
1358 
1359void d_instantiate(struct dentry *entry, struct inode * inode)
1360{
1361        BUG_ON(!list_empty(&entry->d_alias));
1362        if (inode)
1363                spin_lock(&inode->i_lock);
1364        __d_instantiate(entry, inode);
1365        if (inode)
1366                spin_unlock(&inode->i_lock);
1367        security_d_instantiate(entry, inode);
1368}
1369EXPORT_SYMBOL(d_instantiate);
1370
1371/**
1372 * d_instantiate_unique - instantiate a non-aliased dentry
1373 * @entry: dentry to instantiate
1374 * @inode: inode to attach to this dentry
1375 *
1376 * Fill in inode information in the entry. On success, it returns NULL.
1377 * If an unhashed alias of "entry" already exists, then we return the
1378 * aliased dentry instead and drop one reference to inode.
1379 *
1380 * Note that in order to avoid conflicts with rename() etc, the caller
1381 * had better be holding the parent directory semaphore.
1382 *
1383 * This also assumes that the inode count has been incremented
1384 * (or otherwise set) by the caller to indicate that it is now
1385 * in use by the dcache.
1386 */
1387static struct dentry *__d_instantiate_unique(struct dentry *entry,
1388                                             struct inode *inode)
1389{
1390        struct dentry *alias;
1391        int len = entry->d_name.len;
1392        const char *name = entry->d_name.name;
1393        unsigned int hash = entry->d_name.hash;
1394
1395        if (!inode) {
1396                __d_instantiate(entry, NULL);
1397                return NULL;
1398        }
1399
1400        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1401                struct qstr *qstr = &alias->d_name;
1402
1403                /*
1404                 * Don't need alias->d_lock here, because aliases with
1405                 * d_parent == entry->d_parent are not subject to name or
1406                 * parent changes, because the parent inode i_mutex is held.
1407                 */
1408                if (qstr->hash != hash)
1409                        continue;
1410                if (alias->d_parent != entry->d_parent)
1411                        continue;
1412                if (dentry_cmp(qstr->name, qstr->len, name, len))
1413                        continue;
1414                __dget(alias);
1415                return alias;
1416        }
1417
1418        __d_instantiate(entry, inode);
1419        return NULL;
1420}
1421
1422struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1423{
1424        struct dentry *result;
1425
1426        BUG_ON(!list_empty(&entry->d_alias));
1427
1428        if (inode)
1429                spin_lock(&inode->i_lock);
1430        result = __d_instantiate_unique(entry, inode);
1431        if (inode)
1432                spin_unlock(&inode->i_lock);
1433
1434        if (!result) {
1435                security_d_instantiate(entry, inode);
1436                return NULL;
1437        }
1438
1439        BUG_ON(!d_unhashed(result));
1440        iput(inode);
1441        return result;
1442}
1443
1444EXPORT_SYMBOL(d_instantiate_unique);
1445
1446/**
1447 * d_alloc_root - allocate root dentry
1448 * @root_inode: inode to allocate the root for
1449 *
1450 * Allocate a root ("/") dentry for the inode given. The inode is
1451 * instantiated and returned. %NULL is returned if there is insufficient
1452 * memory or the inode passed is %NULL.
1453 */
1454 
1455struct dentry * d_alloc_root(struct inode * root_inode)
1456{
1457        struct dentry *res = NULL;
1458
1459        if (root_inode) {
1460                static const struct qstr name = { .name = "/", .len = 1 };
1461
1462                res = __d_alloc(root_inode->i_sb, &name);
1463                if (res)
1464                        d_instantiate(res, root_inode);
1465        }
1466        return res;
1467}
1468EXPORT_SYMBOL(d_alloc_root);
1469
1470struct dentry *d_make_root(struct inode *root_inode)
1471{
1472        struct dentry *res = NULL;
1473
1474        if (root_inode) {
1475                static const struct qstr name = { .name = "/", .len = 1 };
1476
1477                res = __d_alloc(root_inode->i_sb, &name);
1478                if (res)
1479                        d_instantiate(res, root_inode);
1480                else
1481                        iput(root_inode);
1482        }
1483        return res;
1484}
1485EXPORT_SYMBOL(d_make_root);
1486
1487static struct dentry * __d_find_any_alias(struct inode *inode)
1488{
1489        struct dentry *alias;
1490
1491        if (list_empty(&inode->i_dentry))
1492                return NULL;
1493        alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1494        __dget(alias);
1495        return alias;
1496}
1497
1498/**
1499 * d_find_any_alias - find any alias for a given inode
1500 * @inode: inode to find an alias for
1501 *
1502 * If any aliases exist for the given inode, take and return a
1503 * reference for one of them.  If no aliases exist, return %NULL.
1504 */
1505struct dentry *d_find_any_alias(struct inode *inode)
1506{
1507        struct dentry *de;
1508
1509        spin_lock(&inode->i_lock);
1510        de = __d_find_any_alias(inode);
1511        spin_unlock(&inode->i_lock);
1512        return de;
1513}
1514EXPORT_SYMBOL(d_find_any_alias);
1515
1516/**
1517 * d_obtain_alias - find or allocate a dentry for a given inode
1518 * @inode: inode to allocate the dentry for
1519 *
1520 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1521 * similar open by handle operations.  The returned dentry may be anonymous,
1522 * or may have a full name (if the inode was already in the cache).
1523 *
1524 * When called on a directory inode, we must ensure that the inode only ever
1525 * has one dentry.  If a dentry is found, that is returned instead of
1526 * allocating a new one.
1527 *
1528 * On successful return, the reference to the inode has been transferred
1529 * to the dentry.  In case of an error the reference on the inode is released.
1530 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1531 * be passed in and will be the error will be propagate to the return value,
1532 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1533 */
1534struct dentry *d_obtain_alias(struct inode *inode)
1535{
1536        static const struct qstr anonstring = { .name = "" };
1537        struct dentry *tmp;
1538        struct dentry *res;
1539
1540        if (!inode)
1541                return ERR_PTR(-ESTALE);
1542        if (IS_ERR(inode))
1543                return ERR_CAST(inode);
1544
1545        res = d_find_any_alias(inode);
1546        if (res)
1547                goto out_iput;
1548
1549        tmp = __d_alloc(inode->i_sb, &anonstring);
1550        if (!tmp) {
1551                res = ERR_PTR(-ENOMEM);
1552                goto out_iput;
1553        }
1554
1555        spin_lock(&inode->i_lock);
1556        res = __d_find_any_alias(inode);
1557        if (res) {
1558                spin_unlock(&inode->i_lock);
1559                dput(tmp);
1560                goto out_iput;
1561        }
1562
1563        /* attach a disconnected dentry */
1564        spin_lock(&tmp->d_lock);
1565        tmp->d_inode = inode;
1566        tmp->d_flags |= DCACHE_DISCONNECTED;
1567        list_add(&tmp->d_alias, &inode->i_dentry);
1568        hlist_bl_lock(&tmp->d_sb->s_anon);
1569        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1570        hlist_bl_unlock(&tmp->d_sb->s_anon);
1571        spin_unlock(&tmp->d_lock);
1572        spin_unlock(&inode->i_lock);
1573        security_d_instantiate(tmp, inode);
1574
1575        return tmp;
1576
1577 out_iput:
1578        if (res && !IS_ERR(res))
1579                security_d_instantiate(res, inode);
1580        iput(inode);
1581        return res;
1582}
1583EXPORT_SYMBOL(d_obtain_alias);
1584
1585/**
1586 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1587 * @inode:  the inode which may have a disconnected dentry
1588 * @dentry: a negative dentry which we want to point to the inode.
1589 *
1590 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1591 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1592 * and return it, else simply d_add the inode to the dentry and return NULL.
1593 *
1594 * This is needed in the lookup routine of any filesystem that is exportable
1595 * (via knfsd) so that we can build dcache paths to directories effectively.
1596 *
1597 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1598 * is returned.  This matches the expected return value of ->lookup.
1599 *
1600 */
1601struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1602{
1603        struct dentry *new = NULL;
1604
1605        if (IS_ERR(inode))
1606                return ERR_CAST(inode);
1607
1608        if (inode && S_ISDIR(inode->i_mode)) {
1609                spin_lock(&inode->i_lock);
1610                new = __d_find_alias(inode, 1);
1611                if (new) {
1612                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1613                        spin_unlock(&inode->i_lock);
1614                        security_d_instantiate(new, inode);
1615                        d_move(new, dentry);
1616                        iput(inode);
1617                } else {
1618                        /* already taking inode->i_lock, so d_add() by hand */
1619                        __d_instantiate(dentry, inode);
1620                        spin_unlock(&inode->i_lock);
1621                        security_d_instantiate(dentry, inode);
1622                        d_rehash(dentry);
1623                }
1624        } else
1625                d_add(dentry, inode);
1626        return new;
1627}
1628EXPORT_SYMBOL(d_splice_alias);
1629
1630/**
1631 * d_add_ci - lookup or allocate new dentry with case-exact name
1632 * @inode:  the inode case-insensitive lookup has found
1633 * @dentry: the negative dentry that was passed to the parent's lookup func
1634 * @name:   the case-exact name to be associated with the returned dentry
1635 *
1636 * This is to avoid filling the dcache with case-insensitive names to the
1637 * same inode, only the actual correct case is stored in the dcache for
1638 * case-insensitive filesystems.
1639 *
1640 * For a case-insensitive lookup match and if the the case-exact dentry
1641 * already exists in in the dcache, use it and return it.
1642 *
1643 * If no entry exists with the exact case name, allocate new dentry with
1644 * the exact case, and return the spliced entry.
1645 */
1646struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1647                        struct qstr *name)
1648{
1649        int error;
1650        struct dentry *found;
1651        struct dentry *new;
1652
1653        /*
1654         * First check if a dentry matching the name already exists,
1655         * if not go ahead and create it now.
1656         */
1657        found = d_hash_and_lookup(dentry->d_parent, name);
1658        if (!found) {
1659                new = d_alloc(dentry->d_parent, name);
1660                if (!new) {
1661                        error = -ENOMEM;
1662                        goto err_out;
1663                }
1664
1665                found = d_splice_alias(inode, new);
1666                if (found) {
1667                        dput(new);
1668                        return found;
1669                }
1670                return new;
1671        }
1672
1673        /*
1674         * If a matching dentry exists, and it's not negative use it.
1675         *
1676         * Decrement the reference count to balance the iget() done
1677         * earlier on.
1678         */
1679        if (found->d_inode) {
1680                if (unlikely(found->d_inode != inode)) {
1681                        /* This can't happen because bad inodes are unhashed. */
1682                        BUG_ON(!is_bad_inode(inode));
1683                        BUG_ON(!is_bad_inode(found->d_inode));
1684                }
1685                iput(inode);
1686                return found;
1687        }
1688
1689        /*
1690         * We are going to instantiate this dentry, unhash it and clear the
1691         * lookup flag so we can do that.
1692         */
1693        if (unlikely(d_need_lookup(found)))
1694                d_clear_need_lookup(found);
1695
1696        /*
1697         * Negative dentry: instantiate it unless the inode is a directory and
1698         * already has a dentry.
1699         */
1700        new = d_splice_alias(inode, found);
1701        if (new) {
1702                dput(found);
1703                found = new;
1704        }
1705        return found;
1706
1707err_out:
1708        iput(inode);
1709        return ERR_PTR(error);
1710}
1711EXPORT_SYMBOL(d_add_ci);
1712
1713/**
1714 * __d_lookup_rcu - search for a dentry (racy, store-free)
1715 * @parent: parent dentry
1716 * @name: qstr of name we wish to find
1717 * @seq: returns d_seq value at the point where the dentry was found
1718 * @inode: returns dentry->d_inode when the inode was found valid.
1719 * Returns: dentry, or NULL
1720 *
1721 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1722 * resolution (store-free path walking) design described in
1723 * Documentation/filesystems/path-lookup.txt.
1724 *
1725 * This is not to be used outside core vfs.
1726 *
1727 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1728 * held, and rcu_read_lock held. The returned dentry must not be stored into
1729 * without taking d_lock and checking d_seq sequence count against @seq
1730 * returned here.
1731 *
1732 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1733 * function.
1734 *
1735 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1736 * the returned dentry, so long as its parent's seqlock is checked after the
1737 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1738 * is formed, giving integrity down the path walk.
1739 */
1740struct dentry *__d_lookup_rcu(const struct dentry *parent,
1741                                const struct qstr *name,
1742                                unsigned *seqp, struct inode **inode)
1743{
1744        unsigned int len = name->len;
1745        unsigned int hash = name->hash;
1746        const unsigned char *str = name->name;
1747        struct hlist_bl_head *b = d_hash(parent, hash);
1748        struct hlist_bl_node *node;
1749        struct dentry *dentry;
1750
1751        /*
1752         * Note: There is significant duplication with __d_lookup_rcu which is
1753         * required to prevent single threaded performance regressions
1754         * especially on architectures where smp_rmb (in seqcounts) are costly.
1755         * Keep the two functions in sync.
1756         */
1757
1758        /*
1759         * The hash list is protected using RCU.
1760         *
1761         * Carefully use d_seq when comparing a candidate dentry, to avoid
1762         * races with d_move().
1763         *
1764         * It is possible that concurrent renames can mess up our list
1765         * walk here and result in missing our dentry, resulting in the
1766         * false-negative result. d_lookup() protects against concurrent
1767         * renames using rename_lock seqlock.
1768         *
1769         * See Documentation/filesystems/path-lookup.txt for more details.
1770         */
1771        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1772                unsigned seq;
1773                struct inode *i;
1774                const char *tname;
1775                int tlen;
1776
1777                if (dentry->d_name.hash != hash)
1778                        continue;
1779
1780seqretry:
1781                seq = read_seqcount_begin(&dentry->d_seq);
1782                if (dentry->d_parent != parent)
1783                        continue;
1784                if (d_unhashed(dentry))
1785                        continue;
1786                tlen = dentry->d_name.len;
1787                tname = dentry->d_name.name;
1788                i = dentry->d_inode;
1789                prefetch(tname);
1790                /*
1791                 * This seqcount check is required to ensure name and
1792                 * len are loaded atomically, so as not to walk off the
1793                 * edge of memory when walking. If we could load this
1794                 * atomically some other way, we could drop this check.
1795                 */
1796                if (read_seqcount_retry(&dentry->d_seq, seq))
1797                        goto seqretry;
1798                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1799                        if (parent->d_op->d_compare(parent, *inode,
1800                                                dentry, i,
1801                                                tlen, tname, name))
1802                                continue;
1803                } else {
1804                        if (dentry_cmp(tname, tlen, str, len))
1805                                continue;
1806                }
1807                /*
1808                 * No extra seqcount check is required after the name
1809                 * compare. The caller must perform a seqcount check in
1810                 * order to do anything useful with the returned dentry
1811                 * anyway.
1812                 */
1813                *seqp = seq;
1814                *inode = i;
1815                return dentry;
1816        }
1817        return NULL;
1818}
1819
1820/**
1821 * d_lookup - search for a dentry
1822 * @parent: parent dentry
1823 * @name: qstr of name we wish to find
1824 * Returns: dentry, or NULL
1825 *
1826 * d_lookup searches the children of the parent dentry for the name in
1827 * question. If the dentry is found its reference count is incremented and the
1828 * dentry is returned. The caller must use dput to free the entry when it has
1829 * finished using it. %NULL is returned if the dentry does not exist.
1830 */
1831struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1832{
1833        struct dentry *dentry;
1834        unsigned seq;
1835
1836        do {
1837                seq = read_seqbegin(&rename_lock);
1838                dentry = __d_lookup(parent, name);
1839                if (dentry)
1840                        break;
1841        } while (read_seqretry(&rename_lock, seq));
1842        return dentry;
1843}
1844EXPORT_SYMBOL(d_lookup);
1845
1846/**
1847 * __d_lookup - search for a dentry (racy)
1848 * @parent: parent dentry
1849 * @name: qstr of name we wish to find
1850 * Returns: dentry, or NULL
1851 *
1852 * __d_lookup is like d_lookup, however it may (rarely) return a
1853 * false-negative result due to unrelated rename activity.
1854 *
1855 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1856 * however it must be used carefully, eg. with a following d_lookup in
1857 * the case of failure.
1858 *
1859 * __d_lookup callers must be commented.
1860 */
1861struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1862{
1863        unsigned int len = name->len;
1864        unsigned int hash = name->hash;
1865        const unsigned char *str = name->name;
1866        struct hlist_bl_head *b = d_hash(parent, hash);
1867        struct hlist_bl_node *node;
1868        struct dentry *found = NULL;
1869        struct dentry *dentry;
1870
1871        /*
1872         * Note: There is significant duplication with __d_lookup_rcu which is
1873         * required to prevent single threaded performance regressions
1874         * especially on architectures where smp_rmb (in seqcounts) are costly.
1875         * Keep the two functions in sync.
1876         */
1877
1878        /*
1879         * The hash list is protected using RCU.
1880         *
1881         * Take d_lock when comparing a candidate dentry, to avoid races
1882         * with d_move().
1883         *
1884         * It is possible that concurrent renames can mess up our list
1885         * walk here and result in missing our dentry, resulting in the
1886         * false-negative result. d_lookup() protects against concurrent
1887         * renames using rename_lock seqlock.
1888         *
1889         * See Documentation/filesystems/path-lookup.txt for more details.
1890         */
1891        rcu_read_lock();
1892        
1893        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1894                const char *tname;
1895                int tlen;
1896
1897                if (dentry->d_name.hash != hash)
1898                        continue;
1899
1900                spin_lock(&dentry->d_lock);
1901                if (dentry->d_parent != parent)
1902                        goto next;
1903                if (d_unhashed(dentry))
1904                        goto next;
1905
1906                /*
1907                 * It is safe to compare names since d_move() cannot
1908                 * change the qstr (protected by d_lock).
1909                 */
1910                tlen = dentry->d_name.len;
1911                tname = dentry->d_name.name;
1912                if (parent->d_flags & DCACHE_OP_COMPARE) {
1913                        if (parent->d_op->d_compare(parent, parent->d_inode,
1914                                                dentry, dentry->d_inode,
1915                                                tlen, tname, name))
1916                                goto next;
1917                } else {
1918                        if (dentry_cmp(tname, tlen, str, len))
1919                                goto next;
1920                }
1921
1922                dentry->d_count++;
1923                found = dentry;
1924                spin_unlock(&dentry->d_lock);
1925                break;
1926next:
1927                spin_unlock(&dentry->d_lock);
1928        }
1929        rcu_read_unlock();
1930
1931        return found;
1932}
1933
1934/**
1935 * d_hash_and_lookup - hash the qstr then search for a dentry
1936 * @dir: Directory to search in
1937 * @name: qstr of name we wish to find
1938 *
1939 * On hash failure or on lookup failure NULL is returned.
1940 */
1941struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1942{
1943        struct dentry *dentry = NULL;
1944
1945        /*
1946         * Check for a fs-specific hash function. Note that we must
1947         * calculate the standard hash first, as the d_op->d_hash()
1948         * routine may choose to leave the hash value unchanged.
1949         */
1950        name->hash = full_name_hash(name->name, name->len);
1951        if (dir->d_flags & DCACHE_OP_HASH) {
1952                if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1953                        goto out;
1954        }
1955        dentry = d_lookup(dir, name);
1956out:
1957        return dentry;
1958}
1959
1960/**
1961 * d_validate - verify dentry provided from insecure source (deprecated)
1962 * @dentry: The dentry alleged to be valid child of @dparent
1963 * @dparent: The parent dentry (known to be valid)
1964 *
1965 * An insecure source has sent us a dentry, here we verify it and dget() it.
1966 * This is used by ncpfs in its readdir implementation.
1967 * Zero is returned in the dentry is invalid.
1968 *
1969 * This function is slow for big directories, and deprecated, do not use it.
1970 */
1971int d_validate(struct dentry *dentry, struct dentry *dparent)
1972{
1973        struct dentry *child;
1974
1975        spin_lock(&dparent->d_lock);
1976        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1977                if (dentry == child) {
1978                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1979                        __dget_dlock(dentry);
1980                        spin_unlock(&dentry->d_lock);
1981                        spin_unlock(&dparent->d_lock);
1982                        return 1;
1983                }
1984        }
1985        spin_unlock(&dparent->d_lock);
1986
1987        return 0;
1988}
1989EXPORT_SYMBOL(d_validate);
1990
1991/*
1992 * When a file is deleted, we have two options:
1993 * - turn this dentry into a negative dentry
1994 * - unhash this dentry and free it.
1995 *
1996 * Usually, we want to just turn this into
1997 * a negative dentry, but if anybody else is
1998 * currently using the dentry or the inode
1999 * we can't do that and we fall back on removing
2000 * it from the hash queues and waiting for
2001 * it to be deleted later when it has no users
2002 */
2003 
2004/**
2005 * d_delete - delete a dentry
2006 * @dentry: The dentry to delete
2007 *
2008 * Turn the dentry into a negative dentry if possible, otherwise
2009 * remove it from the hash queues so it can be deleted later
2010 */
2011 
2012void d_delete(struct dentry * dentry)
2013{
2014        struct inode *inode;
2015        int isdir = 0;
2016        /*
2017         * Are we the only user?
2018         */
2019again:
2020        spin_lock(&dentry->d_lock);
2021        inode = dentry->d_inode;
2022        isdir = S_ISDIR(inode->i_mode);
2023        if (dentry->d_count == 1) {
2024                if (inode && !spin_trylock(&inode->i_lock)) {
2025                        spin_unlock(&dentry->d_lock);
2026                        cpu_relax();
2027                        goto again;
2028                }
2029                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2030                dentry_unlink_inode(dentry);
2031                fsnotify_nameremove(dentry, isdir);
2032                return;
2033        }
2034
2035        if (!d_unhashed(dentry))
2036                __d_drop(dentry);
2037
2038        spin_unlock(&dentry->d_lock);
2039
2040        fsnotify_nameremove(dentry, isdir);
2041}
2042EXPORT_SYMBOL(d_delete);
2043
2044static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2045{
2046        BUG_ON(!d_unhashed(entry));
2047        hlist_bl_lock(b);
2048        entry->d_flags |= DCACHE_RCUACCESS;
2049        hlist_bl_add_head_rcu(&entry->d_hash, b);
2050        hlist_bl_unlock(b);
2051}
2052
2053static void _d_rehash(struct dentry * entry)
2054{
2055        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2056}
2057
2058/**
2059 * d_rehash     - add an entry back to the hash
2060 * @entry: dentry to add to the hash
2061 *
2062 * Adds a dentry to the hash according to its name.
2063 */
2064 
2065void d_rehash(struct dentry * entry)
2066{
2067        spin_lock(&entry->d_lock);
2068        _d_rehash(entry);
2069        spin_unlock(&entry->d_lock);
2070}
2071EXPORT_SYMBOL(d_rehash);
2072
2073/**
2074 * dentry_update_name_case - update case insensitive dentry with a new name
2075 * @dentry: dentry to be updated
2076 * @name: new name
2077 *
2078 * Update a case insensitive dentry with new case of name.
2079 *
2080 * dentry must have been returned by d_lookup with name @name. Old and new
2081 * name lengths must match (ie. no d_compare which allows mismatched name
2082 * lengths).
2083 *
2084 * Parent inode i_mutex must be held over d_lookup and into this call (to
2085 * keep renames and concurrent inserts, and readdir(2) away).
2086 */
2087void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2088{
2089        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2090        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2091
2092        spin_lock(&dentry->d_lock);
2093        write_seqcount_begin(&dentry->d_seq);
2094        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2095        write_seqcount_end(&dentry->d_seq);
2096        spin_unlock(&dentry->d_lock);
2097}
2098EXPORT_SYMBOL(dentry_update_name_case);
2099
2100static void switch_names(struct dentry *dentry, struct dentry *target)
2101{
2102        if (dname_external(target)) {
2103                if (dname_external(dentry)) {
2104                        /*
2105                         * Both external: swap the pointers
2106                         */
2107                        swap(target->d_name.name, dentry->d_name.name);
2108                } else {
2109                        /*
2110                         * dentry:internal, target:external.  Steal target's
2111                         * storage and make target internal.
2112                         */
2113                        memcpy(target->d_iname, dentry->d_name.name,
2114                                        dentry->d_name.len + 1);
2115                        dentry->d_name.name = target->d_name.name;
2116                        target->d_name.name = target->d_iname;
2117                }
2118        } else {
2119                if (dname_external(dentry)) {
2120                        /*
2121                         * dentry:external, target:internal.  Give dentry's
2122                         * storage to target and make dentry internal
2123                         */
2124                        memcpy(dentry->d_iname, target->d_name.name,
2125                                        target->d_name.len + 1);
2126                        target->d_name.name = dentry->d_name.name;
2127                        dentry->d_name.name = dentry->d_iname;
2128                } else {
2129                        /*
2130                         * Both are internal.  Just copy target to dentry
2131                         */
2132                        memcpy(dentry->d_iname, target->d_name.name,
2133                                        target->d_name.len + 1);
2134                        dentry->d_name.len = target->d_name.len;
2135                        return;
2136                }
2137        }
2138        swap(dentry->d_name.len, target->d_name.len);
2139}
2140
2141static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2142{
2143        /*
2144         * XXXX: do we really need to take target->d_lock?
2145         */
2146        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2147                spin_lock(&target->d_parent->d_lock);
2148        else {
2149                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2150                        spin_lock(&dentry->d_parent->d_lock);
2151                        spin_lock_nested(&target->d_parent->d_lock,
2152                                                DENTRY_D_LOCK_NESTED);
2153                } else {
2154                        spin_lock(&target->d_parent->d_lock);
2155                        spin_lock_nested(&dentry->d_parent->d_lock,
2156                                                DENTRY_D_LOCK_NESTED);
2157                }
2158        }
2159        if (target < dentry) {
2160                spin_lock_nested(&target->d_lock, 2);
2161                spin_lock_nested(&dentry->d_lock, 3);
2162        } else {
2163                spin_lock_nested(&dentry->d_lock, 2);
2164                spin_lock_nested(&target->d_lock, 3);
2165        }
2166}
2167
2168static void dentry_unlock_parents_for_move(struct dentry *dentry,
2169                                        struct dentry *target)
2170{
2171        if (target->d_parent != dentry->d_parent)
2172                spin_unlock(&dentry->d_parent->d_lock);
2173        if (target->d_parent != target)
2174                spin_unlock(&target->d_parent->d_lock);
2175}
2176
2177/*
2178 * When switching names, the actual string doesn't strictly have to
2179 * be preserved in the target - because we're dropping the target
2180 * anyway. As such, we can just do a simple memcpy() to copy over
2181 * the new name before we switch.
2182 *
2183 * Note that we have to be a lot more careful about getting the hash
2184 * switched - we have to switch the hash value properly even if it
2185 * then no longer matches the actual (corrupted) string of the target.
2186 * The hash value has to match the hash queue that the dentry is on..
2187 */
2188/*
2189 * __d_move - move a dentry
2190 * @dentry: entry to move
2191 * @target: new dentry
2192 *
2193 * Update the dcache to reflect the move of a file name. Negative
2194 * dcache entries should not be moved in this way. Caller must hold
2195 * rename_lock, the i_mutex of the source and target directories,
2196 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2197 */
2198static void __d_move(struct dentry * dentry, struct dentry * target)
2199{
2200        if (!dentry->d_inode)
2201                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2202
2203        BUG_ON(d_ancestor(dentry, target));
2204        BUG_ON(d_ancestor(target, dentry));
2205
2206        dentry_lock_for_move(dentry, target);
2207
2208        write_seqcount_begin(&dentry->d_seq);
2209        write_seqcount_begin(&target->d_seq);
2210
2211        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2212
2213        /*
2214         * Move the dentry to the target hash queue. Don't bother checking
2215         * for the same hash queue because of how unlikely it is.
2216         */
2217        __d_drop(dentry);
2218        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2219
2220        /* Unhash the target: dput() will then get rid of it */
2221        __d_drop(target);
2222
2223        list_del(&dentry->d_u.d_child);
2224        list_del(&target->d_u.d_child);
2225
2226        /* Switch the names.. */
2227        switch_names(dentry, target);
2228        swap(dentry->d_name.hash, target->d_name.hash);
2229
2230        /* ... and switch the parents */
2231        if (IS_ROOT(dentry)) {
2232                dentry->d_parent = target->d_parent;
2233                target->d_parent = target;
2234                INIT_LIST_HEAD(&target->d_u.d_child);
2235        } else {
2236                swap(dentry->d_parent, target->d_parent);
2237
2238                /* And add them back to the (new) parent lists */
2239                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2240        }
2241
2242        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2243
2244        write_seqcount_end(&target->d_seq);
2245        write_seqcount_end(&dentry->d_seq);
2246
2247        dentry_unlock_parents_for_move(dentry, target);
2248        spin_unlock(&target->d_lock);
2249        fsnotify_d_move(dentry);
2250        spin_unlock(&dentry->d_lock);
2251}
2252
2253/*
2254 * d_move - move a dentry
2255 * @dentry: entry to move
2256 * @target: new dentry
2257 *
2258 * Update the dcache to reflect the move of a file name. Negative
2259 * dcache entries should not be moved in this way. See the locking
2260 * requirements for __d_move.
2261 */
2262void d_move(struct dentry *dentry, struct dentry *target)
2263{
2264        write_seqlock(&rename_lock);
2265        __d_move(dentry, target);
2266        write_sequnlock(&rename_lock);
2267}
2268EXPORT_SYMBOL(d_move);
2269
2270/**
2271 * d_ancestor - search for an ancestor
2272 * @p1: ancestor dentry
2273 * @p2: child dentry
2274 *
2275 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2276 * an ancestor of p2, else NULL.
2277 */
2278struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2279{
2280        struct dentry *p;
2281
2282        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2283                if (p->d_parent == p1)
2284                        return p;
2285        }
2286        return NULL;
2287}
2288
2289/*
2290 * This helper attempts to cope with remotely renamed directories
2291 *
2292 * It assumes that the caller is already holding
2293 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2294 *
2295 * Note: If ever the locking in lock_rename() changes, then please
2296 * remember to update this too...
2297 */
2298static struct dentry *__d_unalias(struct inode *inode,
2299                struct dentry *dentry, struct dentry *alias)
2300{
2301        struct mutex *m1 = NULL, *m2 = NULL;
2302        struct dentry *ret;
2303
2304        /* If alias and dentry share a parent, then no extra locks required */
2305        if (alias->d_parent == dentry->d_parent)
2306                goto out_unalias;
2307
2308        /* See lock_rename() */
2309        ret = ERR_PTR(-EBUSY);
2310        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2311                goto out_err;
2312        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2313        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2314                goto out_err;
2315        m2 = &alias->d_parent->d_inode->i_mutex;
2316out_unalias:
2317        __d_move(alias, dentry);
2318        ret = alias;
2319out_err:
2320        spin_unlock(&inode->i_lock);
2321        if (m2)
2322                mutex_unlock(m2);
2323        if (m1)
2324                mutex_unlock(m1);
2325        return ret;
2326}
2327
2328/*
2329 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2330 * named dentry in place of the dentry to be replaced.
2331 * returns with anon->d_lock held!
2332 */
2333static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2334{
2335        struct dentry *dparent, *aparent;
2336
2337        dentry_lock_for_move(anon, dentry);
2338
2339        write_seqcount_begin(&dentry->d_seq);
2340        write_seqcount_begin(&anon->d_seq);
2341
2342        dparent = dentry->d_parent;
2343        aparent = anon->d_parent;
2344
2345        switch_names(dentry, anon);
2346        swap(dentry->d_name.hash, anon->d_name.hash);
2347
2348        dentry->d_parent = (aparent == anon) ? dentry : aparent;
2349        list_del(&dentry->d_u.d_child);
2350        if (!IS_ROOT(dentry))
2351                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2352        else
2353                INIT_LIST_HEAD(&dentry->d_u.d_child);
2354
2355        anon->d_parent = (dparent == dentry) ? anon : dparent;
2356        list_del(&anon->d_u.d_child);
2357        if (!IS_ROOT(anon))
2358                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2359        else
2360                INIT_LIST_HEAD(&anon->d_u.d_child);
2361
2362        write_seqcount_end(&dentry->d_seq);
2363        write_seqcount_end(&anon->d_seq);
2364
2365        dentry_unlock_parents_for_move(anon, dentry);
2366        spin_unlock(&dentry->d_lock);
2367
2368        /* anon->d_lock still locked, returns locked */
2369        anon->d_flags &= ~DCACHE_DISCONNECTED;
2370}
2371
2372/**
2373 * d_materialise_unique - introduce an inode into the tree
2374 * @dentry: candidate dentry
2375 * @inode: inode to bind to the dentry, to which aliases may be attached
2376 *
2377 * Introduces an dentry into the tree, substituting an extant disconnected
2378 * root directory alias in its place if there is one. Caller must hold the
2379 * i_mutex of the parent directory.
2380 */
2381struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2382{
2383        struct dentry *actual;
2384
2385        BUG_ON(!d_unhashed(dentry));
2386
2387        if (!inode) {
2388                actual = dentry;
2389                __d_instantiate(dentry, NULL);
2390                d_rehash(actual);
2391                goto out_nolock;
2392        }
2393
2394        spin_lock(&inode->i_lock);
2395
2396        if (S_ISDIR(inode->i_mode)) {
2397                struct dentry *alias;
2398
2399                /* Does an aliased dentry already exist? */
2400                alias = __d_find_alias(inode, 0);
2401                if (alias) {
2402                        actual = alias;
2403                        write_seqlock(&rename_lock);
2404
2405                        if (d_ancestor(alias, dentry)) {
2406                                /* Check for loops */
2407                                actual = ERR_PTR(-ELOOP);
2408                                spin_unlock(&inode->i_lock);
2409                        } else if (IS_ROOT(alias)) {
2410                                /* Is this an anonymous mountpoint that we
2411                                 * could splice into our tree? */
2412                                __d_materialise_dentry(dentry, alias);
2413                                write_sequnlock(&rename_lock);
2414                                __d_drop(alias);
2415                                goto found;
2416                        } else {
2417                                /* Nope, but we must(!) avoid directory
2418                                 * aliasing. This drops inode->i_lock */
2419                                actual = __d_unalias(inode, dentry, alias);
2420                        }
2421                        write_sequnlock(&rename_lock);
2422                        if (IS_ERR(actual)) {
2423                                if (PTR_ERR(actual) == -ELOOP)
2424                                        pr_warn_ratelimited(
2425                                                "VFS: Lookup of '%s' in %s %s"
2426                                                " would have caused loop\n",
2427                                                dentry->d_name.name,
2428                                                inode->i_sb->s_type->name,
2429                                                inode->i_sb->s_id);
2430                                dput(alias);
2431                        }
2432                        goto out_nolock;
2433                }
2434        }
2435
2436        /* Add a unique reference */
2437        actual = __d_instantiate_unique(dentry, inode);
2438        if (!actual)
2439                actual = dentry;
2440        else
2441                BUG_ON(!d_unhashed(actual));
2442
2443        spin_lock(&actual->d_lock);
2444found:
2445        _d_rehash(actual);
2446        spin_unlock(&actual->d_lock);
2447        spin_unlock(&inode->i_lock);
2448out_nolock:
2449        if (actual == dentry) {
2450                security_d_instantiate(dentry, inode);
2451                return NULL;
2452        }
2453
2454        iput(inode);
2455        return actual;
2456}
2457EXPORT_SYMBOL_GPL(d_materialise_unique);
2458
2459static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2460{
2461        *buflen -= namelen;
2462        if (*buflen < 0)
2463                return -ENAMETOOLONG;
2464        *buffer -= namelen;
2465        memcpy(*buffer, str, namelen);
2466        return 0;
2467}
2468
2469static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2470{
2471        return prepend(buffer, buflen, name->name, name->len);
2472}
2473
2474/**
2475 * prepend_path - Prepend path string to a buffer
2476 * @path: the dentry/vfsmount to report
2477 * @root: root vfsmnt/dentry
2478 * @buffer: pointer to the end of the buffer
2479 * @buflen: pointer to buffer length
2480 *
2481 * Caller holds the rename_lock.
2482 */
2483static int prepend_path(const struct path *path,
2484                        const struct path *root,
2485                        char **buffer, int *buflen)
2486{
2487        struct dentry *dentry = path->dentry;
2488        struct vfsmount *vfsmnt = path->mnt;
2489        struct mount *mnt = real_mount(vfsmnt);
2490        bool slash = false;
2491        int error = 0;
2492
2493        br_read_lock(vfsmount_lock);
2494        while (dentry != root->dentry || vfsmnt != root->mnt) {
2495                struct dentry * parent;
2496
2497                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2498                        /* Global root? */
2499                        if (!mnt_has_parent(mnt))
2500                                goto global_root;
2501                        dentry = mnt->mnt_mountpoint;
2502                        mnt = mnt->mnt_parent;
2503                        vfsmnt = &mnt->mnt;
2504                        continue;
2505                }
2506                parent = dentry->d_parent;
2507                prefetch(parent);
2508                spin_lock(&dentry->d_lock);
2509                error = prepend_name(buffer, buflen, &dentry->d_name);
2510                spin_unlock(&dentry->d_lock);
2511                if (!error)
2512                        error = prepend(buffer, buflen, "/", 1);
2513                if (error)
2514                        break;
2515
2516                slash = true;
2517                dentry = parent;
2518        }
2519
2520        if (!error && !slash)
2521                error = prepend(buffer, buflen, "/", 1);
2522
2523out:
2524        br_read_unlock(vfsmount_lock);
2525        return error;
2526
2527global_root:
2528        /*
2529         * Filesystems needing to implement special "root names"
2530         * should do so with ->d_dname()
2531         */
2532        if (IS_ROOT(dentry) &&
2533            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2534                WARN(1, "Root dentry has weird name <%.*s>\n",
2535                     (int) dentry->d_name.len, dentry->d_name.name);
2536        }
2537        if (!slash)
2538                error = prepend(buffer, buflen, "/", 1);
2539        if (!error)
2540                error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
2541        goto out;
2542}
2543
2544/**
2545 * __d_path - return the path of a dentry
2546 * @path: the dentry/vfsmount to report
2547 * @root: root vfsmnt/dentry
2548 * @buf: buffer to return value in
2549 * @buflen: buffer length
2550 *
2551 * Convert a dentry into an ASCII path name.
2552 *
2553 * Returns a pointer into the buffer or an error code if the
2554 * path was too long.
2555 *
2556 * "buflen" should be positive.
2557 *
2558 * If the path is not reachable from the supplied root, return %NULL.
2559 */
2560char *__d_path(const struct path *path,
2561               const struct path *root,
2562               char *buf, int buflen)
2563{
2564        char *res = buf + buflen;
2565        int error;
2566
2567        prepend(&res, &buflen, "\0", 1);
2568        write_seqlock(&rename_lock);
2569        error = prepend_path(path, root, &res, &buflen);
2570        write_sequnlock(&rename_lock);
2571
2572        if (error < 0)
2573                return ERR_PTR(error);
2574        if (error > 0)
2575                return NULL;
2576        return res;
2577}
2578
2579char *d_absolute_path(const struct path *path,
2580               char *buf, int buflen)
2581{
2582        struct path root = {};
2583        char *res = buf + buflen;
2584        int error;
2585
2586        prepend(&res, &buflen, "\0", 1);
2587        write_seqlock(&rename_lock);
2588        error = prepend_path(path, &root, &res, &buflen);
2589        write_sequnlock(&rename_lock);
2590
2591        if (error > 1)
2592                error = -EINVAL;
2593        if (error < 0)
2594                return ERR_PTR(error);
2595        return res;
2596}
2597
2598/*
2599 * same as __d_path but appends "(deleted)" for unlinked files.
2600 */
2601static int path_with_deleted(const struct path *path,
2602                             const struct path *root,
2603                             char **buf, int *buflen)
2604{
2605        prepend(buf, buflen, "\0", 1);
2606        if (d_unlinked(path->dentry)) {
2607                int error = prepend(buf, buflen, " (deleted)", 10);
2608                if (error)
2609                        return error;
2610        }
2611
2612        return prepend_path(path, root, buf, buflen);
2613}
2614
2615static int prepend_unreachable(char **buffer, int *buflen)
2616{
2617        return prepend(buffer, buflen, "(unreachable)", 13);
2618}
2619
2620/**
2621 * d_path - return the path of a dentry
2622 * @path: path to report
2623 * @buf: buffer to return value in
2624 * @buflen: buffer length
2625 *
2626 * Convert a dentry into an ASCII path name. If the entry has been deleted
2627 * the string " (deleted)" is appended. Note that this is ambiguous.
2628 *
2629 * Returns a pointer into the buffer or an error code if the path was
2630 * too long. Note: Callers should use the returned pointer, not the passed
2631 * in buffer, to use the name! The implementation often starts at an offset
2632 * into the buffer, and may leave 0 bytes at the start.
2633 *
2634 * "buflen" should be positive.
2635 */
2636char *d_path(const struct path *path, char *buf, int buflen)
2637{
2638        char *res = buf + buflen;
2639        struct path root;
2640        int error;
2641
2642        /*
2643         * We have various synthetic filesystems that never get mounted.  On
2644         * these filesystems dentries are never used for lookup purposes, and
2645         * thus don't need to be hashed.  They also don't need a name until a
2646         * user wants to identify the object in /proc/pid/fd/.  The little hack
2647         * below allows us to generate a name for these objects on demand:
2648         */
2649        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2650                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2651
2652        get_fs_root(current->fs, &root);
2653        write_seqlock(&rename_lock);
2654        error = path_with_deleted(path, &root, &res, &buflen);
2655        if (error < 0)
2656                res = ERR_PTR(error);
2657        write_sequnlock(&rename_lock);
2658        path_put(&root);
2659        return res;
2660}
2661EXPORT_SYMBOL(d_path);
2662
2663/**
2664 * d_path_with_unreachable - return the path of a dentry
2665 * @path: path to report
2666 * @buf: buffer to return value in
2667 * @buflen: buffer length
2668 *
2669 * The difference from d_path() is that this prepends "(unreachable)"
2670 * to paths which are unreachable from the current process' root.
2671 */
2672char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2673{
2674        char *res = buf + buflen;
2675        struct path root;
2676        int error;
2677
2678        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2679                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2680
2681        get_fs_root(current->fs, &root);
2682        write_seqlock(&rename_lock);
2683        error = path_with_deleted(path, &root, &res, &buflen);
2684        if (error > 0)
2685                error = prepend_unreachable(&res, &buflen);
2686        write_sequnlock(&rename_lock);
2687        path_put(&root);
2688        if (error)
2689                res =  ERR_PTR(error);
2690
2691        return res;
2692}
2693
2694/*
2695 * Helper function for dentry_operations.d_dname() members
2696 */
2697char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2698                        const char *fmt, ...)
2699{
2700        va_list args;
2701        char temp[64];
2702        int sz;
2703
2704        va_start(args, fmt);
2705        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2706        va_end(args);
2707
2708        if (sz > sizeof(temp) || sz > buflen)
2709                return ERR_PTR(-ENAMETOOLONG);
2710
2711        buffer += buflen - sz;
2712        return memcpy(buffer, temp, sz);
2713}
2714
2715/*
2716 * Write full pathname from the root of the filesystem into the buffer.
2717 */
2718static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2719{
2720        char *end = buf + buflen;
2721        char *retval;
2722
2723        prepend(&end, &buflen, "\0", 1);
2724        if (buflen < 1)
2725                goto Elong;
2726        /* Get '/' right */
2727        retval = end-1;
2728        *retval = '/';
2729
2730        while (!IS_ROOT(dentry)) {
2731                struct dentry *parent = dentry->d_parent;
2732                int error;
2733
2734                prefetch(parent);
2735                spin_lock(&dentry->d_lock);
2736                error = prepend_name(&end, &buflen, &dentry->d_name);
2737                spin_unlock(&dentry->d_lock);
2738                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2739                        goto Elong;
2740
2741                retval = end;
2742                dentry = parent;
2743        }
2744        return retval;
2745Elong:
2746        return ERR_PTR(-ENAMETOOLONG);
2747}
2748
2749char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2750{
2751        char *retval;
2752
2753        write_seqlock(&rename_lock);
2754        retval = __dentry_path(dentry, buf, buflen);
2755        write_sequnlock(&rename_lock);
2756
2757        return retval;
2758}
2759EXPORT_SYMBOL(dentry_path_raw);
2760
2761char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2762{
2763        char *p = NULL;
2764        char *retval;
2765
2766        write_seqlock(&rename_lock);
2767        if (d_unlinked(dentry)) {
2768                p = buf + buflen;
2769                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2770                        goto Elong;
2771                buflen++;
2772        }
2773        retval = __dentry_path(dentry, buf, buflen);
2774        write_sequnlock(&rename_lock);
2775        if (!IS_ERR(retval) && p)
2776                *p = '/';       /* restore '/' overriden with '\0' */
2777        return retval;
2778Elong:
2779        return ERR_PTR(-ENAMETOOLONG);
2780}
2781
2782/*
2783 * NOTE! The user-level library version returns a
2784 * character pointer. The kernel system call just
2785 * returns the length of the buffer filled (which
2786 * includes the ending '\0' character), or a negative
2787 * error value. So libc would do something like
2788 *
2789 *      char *getcwd(char * buf, size_t size)
2790 *      {
2791 *              int retval;
2792 *
2793 *              retval = sys_getcwd(buf, size);
2794 *              if (retval >= 0)
2795 *                      return buf;
2796 *              errno = -retval;
2797 *              return NULL;
2798 *      }
2799 */
2800SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2801{
2802        int error;
2803        struct path pwd, root;
2804        char *page = (char *) __get_free_page(GFP_USER);
2805
2806        if (!page)
2807                return -ENOMEM;
2808
2809        get_fs_root_and_pwd(current->fs, &root, &pwd);
2810
2811        error = -ENOENT;
2812        write_seqlock(&rename_lock);
2813        if (!d_unlinked(pwd.dentry)) {
2814                unsigned long len;
2815                char *cwd = page + PAGE_SIZE;
2816                int buflen = PAGE_SIZE;
2817
2818                prepend(&cwd, &buflen, "\0", 1);
2819                error = prepend_path(&pwd, &root, &cwd, &buflen);
2820                write_sequnlock(&rename_lock);
2821
2822                if (error < 0)
2823                        goto out;
2824
2825                /* Unreachable from current root */
2826                if (error > 0) {
2827                        error = prepend_unreachable(&cwd, &buflen);
2828                        if (error)
2829                                goto out;
2830                }
2831
2832                error = -ERANGE;
2833                len = PAGE_SIZE + page - cwd;
2834                if (len <= size) {
2835                        error = len;
2836                        if (copy_to_user(buf, cwd, len))
2837                                error = -EFAULT;
2838                }
2839        } else {
2840                write_sequnlock(&rename_lock);
2841        }
2842
2843out:
2844        path_put(&pwd);
2845        path_put(&root);
2846        free_page((unsigned long) page);
2847        return error;
2848}
2849
2850/*
2851 * Test whether new_dentry is a subdirectory of old_dentry.
2852 *
2853 * Trivially implemented using the dcache structure
2854 */
2855
2856/**
2857 * is_subdir - is new dentry a subdirectory of old_dentry
2858 * @new_dentry: new dentry
2859 * @old_dentry: old dentry
2860 *
2861 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2862 * Returns 0 otherwise.
2863 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2864 */
2865  
2866int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2867{
2868        int result;
2869        unsigned seq;
2870
2871        if (new_dentry == old_dentry)
2872                return 1;
2873
2874        do {
2875                /* for restarting inner loop in case of seq retry */
2876                seq = read_seqbegin(&rename_lock);
2877                /*
2878                 * Need rcu_readlock to protect against the d_parent trashing
2879                 * due to d_move
2880                 */
2881                rcu_read_lock();
2882                if (d_ancestor(old_dentry, new_dentry))
2883                        result = 1;
2884                else
2885                        result = 0;
2886                rcu_read_unlock();
2887        } while (read_seqretry(&rename_lock, seq));
2888
2889        return result;
2890}
2891
2892void d_genocide(struct dentry *root)
2893{
2894        struct dentry *this_parent;
2895        struct list_head *next;
2896        unsigned seq;
2897        int locked = 0;
2898
2899        seq = read_seqbegin(&rename_lock);
2900again:
2901        this_parent = root;
2902        spin_lock(&this_parent->d_lock);
2903repeat:
2904        next = this_parent->d_subdirs.next;
2905resume:
2906        while (next != &this_parent->d_subdirs) {
2907                struct list_head *tmp = next;
2908                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2909                next = tmp->next;
2910
2911                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2912                if (d_unhashed(dentry) || !dentry->d_inode) {
2913                        spin_unlock(&dentry->d_lock);
2914                        continue;
2915                }
2916                if (!list_empty(&dentry->d_subdirs)) {
2917                        spin_unlock(&this_parent->d_lock);
2918                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2919                        this_parent = dentry;
2920                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2921                        goto repeat;
2922                }
2923                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2924                        dentry->d_flags |= DCACHE_GENOCIDE;
2925                        dentry->d_count--;
2926                }
2927                spin_unlock(&dentry->d_lock);
2928        }
2929        if (this_parent != root) {
2930                struct dentry *child = this_parent;
2931                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2932                        this_parent->d_flags |= DCACHE_GENOCIDE;
2933                        this_parent->d_count--;
2934                }
2935                this_parent = try_to_ascend(this_parent, locked, seq);
2936                if (!this_parent)
2937                        goto rename_retry;
2938                next = child->d_u.d_child.next;
2939                goto resume;
2940        }
2941        spin_unlock(&this_parent->d_lock);
2942        if (!locked && read_seqretry(&rename_lock, seq))
2943                goto rename_retry;
2944        if (locked)
2945                write_sequnlock(&rename_lock);
2946        return;
2947
2948rename_retry:
2949        locked = 1;
2950        write_seqlock(&rename_lock);
2951        goto again;
2952}
2953
2954/**
2955 * find_inode_number - check for dentry with name
2956 * @dir: directory to check
2957 * @name: Name to find.
2958 *
2959 * Check whether a dentry already exists for the given name,
2960 * and return the inode number if it has an inode. Otherwise
2961 * 0 is returned.
2962 *
2963 * This routine is used to post-process directory listings for
2964 * filesystems using synthetic inode numbers, and is necessary
2965 * to keep getcwd() working.
2966 */
2967 
2968ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2969{
2970        struct dentry * dentry;
2971        ino_t ino = 0;
2972
2973        dentry = d_hash_and_lookup(dir, name);
2974        if (dentry) {
2975                if (dentry->d_inode)
2976                        ino = dentry->d_inode->i_ino;
2977                dput(dentry);
2978        }
2979        return ino;
2980}
2981EXPORT_SYMBOL(find_inode_number);
2982
2983static __initdata unsigned long dhash_entries;
2984static int __init set_dhash_entries(char *str)
2985{
2986        if (!str)
2987                return 0;
2988        dhash_entries = simple_strtoul(str, &str, 0);
2989        return 1;
2990}
2991__setup("dhash_entries=", set_dhash_entries);
2992
2993static void __init dcache_init_early(void)
2994{
2995        unsigned int loop;
2996
2997        /* If hashes are distributed across NUMA nodes, defer
2998         * hash allocation until vmalloc space is available.
2999         */
3000        if (hashdist)
3001                return;
3002
3003        dentry_hashtable =
3004                alloc_large_system_hash("Dentry cache",
3005                                        sizeof(struct hlist_bl_head),
3006                                        dhash_entries,
3007                                        13,
3008                                        HASH_EARLY,
3009                                        &d_hash_shift,
3010                                        &d_hash_mask,
3011                                        0);
3012
3013        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3014                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3015}
3016
3017static void __init dcache_init(void)
3018{
3019        unsigned int loop;
3020
3021        /* 
3022         * A constructor could be added for stable state like the lists,
3023         * but it is probably not worth it because of the cache nature
3024         * of the dcache. 
3025         */
3026        dentry_cache = KMEM_CACHE(dentry,
3027                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3028
3029        /* Hash may have been set up in dcache_init_early */
3030        if (!hashdist)
3031                return;
3032
3033        dentry_hashtable =
3034                alloc_large_system_hash("Dentry cache",
3035                                        sizeof(struct hlist_bl_head),
3036                                        dhash_entries,
3037                                        13,
3038                                        0,
3039                                        &d_hash_shift,
3040                                        &d_hash_mask,
3041                                        0);
3042
3043        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3044                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3045}
3046
3047/* SLAB cache for __getname() consumers */
3048struct kmem_cache *names_cachep __read_mostly;
3049EXPORT_SYMBOL(names_cachep);
3050
3051EXPORT_SYMBOL(d_genocide);
3052
3053void __init vfs_caches_init_early(void)
3054{
3055        dcache_init_early();
3056        inode_init_early();
3057}
3058
3059void __init vfs_caches_init(unsigned long mempages)
3060{
3061        unsigned long reserve;
3062
3063        /* Base hash sizes on available memory, with a reserve equal to
3064           150% of current kernel size */
3065
3066        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3067        mempages -= reserve;
3068
3069        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3070                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3071
3072        dcache_init();
3073        inode_init();
3074        files_init(mempages);
3075        mnt_init();
3076        bdev_cache_init();
3077        chrdev_init();
3078}
3079