linux-old/fs/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/namei.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 * Some corrections by tytso.
   9 */
  10
  11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12 * lookup logic.
  13 */
  14/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
  15 */
  16
  17#include <linux/init.h>
  18#include <linux/slab.h>
  19#include <linux/fs.h>
  20#include <linux/quotaops.h>
  21#include <linux/pagemap.h>
  22#include <linux/dnotify.h>
  23#include <linux/smp_lock.h>
  24#include <linux/personality.h>
  25
  26#include <asm/namei.h>
  27#include <asm/uaccess.h>
  28
  29#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  30
  31/* [Feb-1997 T. Schoebel-Theuer]
  32 * Fundamental changes in the pathname lookup mechanisms (namei)
  33 * were necessary because of omirr.  The reason is that omirr needs
  34 * to know the _real_ pathname, not the user-supplied one, in case
  35 * of symlinks (and also when transname replacements occur).
  36 *
  37 * The new code replaces the old recursive symlink resolution with
  38 * an iterative one (in case of non-nested symlink chains).  It does
  39 * this with calls to <fs>_follow_link().
  40 * As a side effect, dir_namei(), _namei() and follow_link() are now 
  41 * replaced with a single function lookup_dentry() that can handle all 
  42 * the special cases of the former code.
  43 *
  44 * With the new dcache, the pathname is stored at each inode, at least as
  45 * long as the refcount of the inode is positive.  As a side effect, the
  46 * size of the dcache depends on the inode cache and thus is dynamic.
  47 *
  48 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  49 * resolution to correspond with current state of the code.
  50 *
  51 * Note that the symlink resolution is not *completely* iterative.
  52 * There is still a significant amount of tail- and mid- recursion in
  53 * the algorithm.  Also, note that <fs>_readlink() is not used in
  54 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  55 * may return different results than <fs>_follow_link().  Many virtual
  56 * filesystems (including /proc) exhibit this behavior.
  57 */
  58
  59/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  60 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  61 * and the name already exists in form of a symlink, try to create the new
  62 * name indicated by the symlink. The old code always complained that the
  63 * name already exists, due to not following the symlink even if its target
  64 * is nonexistent.  The new semantics affects also mknod() and link() when
  65 * the name is a symlink pointing to a non-existant name.
  66 *
  67 * I don't know which semantics is the right one, since I have no access
  68 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  69 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  70 * "old" one. Personally, I think the new semantics is much more logical.
  71 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  72 * file does succeed in both HP-UX and SunOs, but not in Solaris
  73 * and in the old Linux semantics.
  74 */
  75
  76/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  77 * semantics.  See the comments in "open_namei" and "do_link" below.
  78 *
  79 * [10-Sep-98 Alan Modra] Another symlink change.
  80 */
  81
  82/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
  83 *      inside the path - always follow.
  84 *      in the last component in creation/removal/renaming - never follow.
  85 *      if LOOKUP_FOLLOW passed - follow.
  86 *      if the pathname has trailing slashes - follow.
  87 *      otherwise - don't follow.
  88 * (applied in that order).
  89 *
  90 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
  91 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
  92 * During the 2.4 we need to fix the userland stuff depending on it -
  93 * hopefully we will be able to get rid of that wart in 2.5. So far only
  94 * XEmacs seems to be relying on it...
  95 */
  96
  97/* In order to reduce some races, while at the same time doing additional
  98 * checking and hopefully speeding things up, we copy filenames to the
  99 * kernel data space before using them..
 100 *
 101 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 102 * PATH_MAX includes the nul terminator --RR.
 103 */
 104static inline int do_getname(const char *filename, char *page)
 105{
 106        int retval;
 107        unsigned long len = PATH_MAX;
 108
 109        if ((unsigned long) filename >= TASK_SIZE) {
 110                if (!segment_eq(get_fs(), KERNEL_DS))
 111                        return -EFAULT;
 112        } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
 113                len = TASK_SIZE - (unsigned long) filename;
 114
 115        retval = strncpy_from_user((char *)page, filename, len);
 116        if (retval > 0) {
 117                if (retval < len)
 118                        return 0;
 119                return -ENAMETOOLONG;
 120        } else if (!retval)
 121                retval = -ENOENT;
 122        return retval;
 123}
 124
 125char * getname(const char * filename)
 126{
 127        char *tmp, *result;
 128
 129        result = ERR_PTR(-ENOMEM);
 130        tmp = __getname();
 131        if (tmp)  {
 132                int retval = do_getname(filename, tmp);
 133
 134                result = tmp;
 135                if (retval < 0) {
 136                        putname(tmp);
 137                        result = ERR_PTR(retval);
 138                }
 139        }
 140        return result;
 141}
 142
 143/*
 144 *      vfs_permission()
 145 *
 146 * is used to check for read/write/execute permissions on a file.
 147 * We use "fsuid" for this, letting us set arbitrary permissions
 148 * for filesystem access without changing the "normal" uids which
 149 * are used for other things..
 150 */
 151int vfs_permission(struct inode * inode, int mask)
 152{
 153        umode_t                 mode = inode->i_mode;
 154
 155        if (mask & MAY_WRITE) {
 156                /*
 157                 * Nobody gets write access to a read-only fs.
 158                 */
 159                if (IS_RDONLY(inode) &&
 160                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 161                        return -EROFS;
 162
 163                /*
 164                 * Nobody gets write access to an immutable file.
 165                 */
 166                if (IS_IMMUTABLE(inode))
 167                        return -EACCES;
 168        }
 169
 170        if (current->fsuid == inode->i_uid)
 171                mode >>= 6;
 172        else if (in_group_p(inode->i_gid))
 173                mode >>= 3;
 174
 175        /*
 176         * If the DACs are ok we don't need any capability check.
 177         */
 178        if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
 179                return 0;
 180
 181        /*
 182         * Read/write DACs are always overridable.
 183         * Executable DACs are overridable if at least one exec bit is set.
 184         */
 185        if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
 186                if (capable(CAP_DAC_OVERRIDE))
 187                        return 0;
 188
 189        /*
 190         * Searching includes executable on directories, else just read.
 191         */
 192        if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
 193                if (capable(CAP_DAC_READ_SEARCH))
 194                        return 0;
 195
 196        return -EACCES;
 197}
 198
 199int permission(struct inode * inode,int mask)
 200{
 201        if (inode->i_op && inode->i_op->permission) {
 202                int retval;
 203                lock_kernel();
 204                retval = inode->i_op->permission(inode, mask);
 205                unlock_kernel();
 206                return retval;
 207        }
 208        return vfs_permission(inode, mask);
 209}
 210
 211/*
 212 * get_write_access() gets write permission for a file.
 213 * put_write_access() releases this write permission.
 214 * This is used for regular files.
 215 * We cannot support write (and maybe mmap read-write shared) accesses and
 216 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 217 * can have the following values:
 218 * 0: no writers, no VM_DENYWRITE mappings
 219 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 220 * > 0: (i_writecount) users are writing to the file.
 221 *
 222 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
 223 * except for the cases where we don't hold i_writecount yet. Then we need to
 224 * use {get,deny}_write_access() - these functions check the sign and refuse
 225 * to do the change if sign is wrong. Exclusion between them is provided by
 226 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
 227 * who will try to move it in struct inode - just leave it here.
 228 */
 229static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
 230int get_write_access(struct inode * inode)
 231{
 232        spin_lock(&arbitration_lock);
 233        if (atomic_read(&inode->i_writecount) < 0) {
 234                spin_unlock(&arbitration_lock);
 235                return -ETXTBSY;
 236        }
 237        atomic_inc(&inode->i_writecount);
 238        spin_unlock(&arbitration_lock);
 239        return 0;
 240}
 241int deny_write_access(struct file * file)
 242{
 243        spin_lock(&arbitration_lock);
 244        if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
 245                spin_unlock(&arbitration_lock);
 246                return -ETXTBSY;
 247        }
 248        atomic_dec(&file->f_dentry->d_inode->i_writecount);
 249        spin_unlock(&arbitration_lock);
 250        return 0;
 251}
 252
 253void path_release(struct nameidata *nd)
 254{
 255        dput(nd->dentry);
 256        mntput(nd->mnt);
 257}
 258
 259/*
 260 * Internal lookup() using the new generic dcache.
 261 * SMP-safe
 262 */
 263static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 264{
 265        struct dentry * dentry = d_lookup(parent, name);
 266
 267        if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 268                if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 269                        dput(dentry);
 270                        dentry = NULL;
 271                }
 272        }
 273        return dentry;
 274}
 275
 276/*
 277 * This is called when everything else fails, and we actually have
 278 * to go to the low-level filesystem to find out what we should do..
 279 *
 280 * We get the directory semaphore, and after getting that we also
 281 * make sure that nobody added the entry to the dcache in the meantime..
 282 * SMP-safe
 283 */
 284static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 285{
 286        struct dentry * result;
 287        struct inode *dir = parent->d_inode;
 288
 289        down(&dir->i_sem);
 290        /*
 291         * First re-do the cached lookup just in case it was created
 292         * while we waited for the directory semaphore..
 293         *
 294         * FIXME! This could use version numbering or similar to
 295         * avoid unnecessary cache lookups.
 296         */
 297        result = d_lookup(parent, name);
 298        if (!result) {
 299                struct dentry * dentry = d_alloc(parent, name);
 300                result = ERR_PTR(-ENOMEM);
 301                if (dentry) {
 302                        lock_kernel();
 303                        result = dir->i_op->lookup(dir, dentry);
 304                        unlock_kernel();
 305                        if (result)
 306                                dput(dentry);
 307                        else
 308                                result = dentry;
 309                }
 310                up(&dir->i_sem);
 311                return result;
 312        }
 313
 314        /*
 315         * Uhhuh! Nasty case: the cache was re-populated while
 316         * we waited on the semaphore. Need to revalidate.
 317         */
 318        up(&dir->i_sem);
 319        if (result->d_op && result->d_op->d_revalidate) {
 320                if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
 321                        dput(result);
 322                        result = ERR_PTR(-ENOENT);
 323                }
 324        }
 325        return result;
 326}
 327
 328/*
 329 * This limits recursive symlink follows to 5, while
 330 * limiting consecutive symlinks to 40.
 331 *
 332 * Without that kind of total limit, nasty chains of consecutive
 333 * symlinks can cause almost arbitrarily long lookups. 
 334 */
 335static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 336{
 337        int err;
 338        if (current->link_count >= 5)
 339                goto loop;
 340        if (current->total_link_count >= 40)
 341                goto loop;
 342        if (current->need_resched) {
 343                current->state = TASK_RUNNING;
 344                schedule();
 345        }
 346        current->link_count++;
 347        current->total_link_count++;
 348        UPDATE_ATIME(dentry->d_inode);
 349        err = dentry->d_inode->i_op->follow_link(dentry, nd);
 350        current->link_count--;
 351        return err;
 352loop:
 353        path_release(nd);
 354        return -ELOOP;
 355}
 356
 357static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
 358{
 359        struct vfsmount *parent;
 360        struct dentry *dentry;
 361        spin_lock(&dcache_lock);
 362        parent=(*mnt)->mnt_parent;
 363        if (parent == *mnt) {
 364                spin_unlock(&dcache_lock);
 365                return 0;
 366        }
 367        mntget(parent);
 368        dentry=dget((*mnt)->mnt_mountpoint);
 369        spin_unlock(&dcache_lock);
 370        dput(*base);
 371        *base = dentry;
 372        mntput(*mnt);
 373        *mnt = parent;
 374        return 1;
 375}
 376
 377int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 378{
 379        return __follow_up(mnt, dentry);
 380}
 381
 382static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 383{
 384        struct vfsmount *mounted;
 385
 386        spin_lock(&dcache_lock);
 387        mounted = lookup_mnt(*mnt, *dentry);
 388        if (mounted) {
 389                *mnt = mntget(mounted);
 390                spin_unlock(&dcache_lock);
 391                dput(*dentry);
 392                mntput(mounted->mnt_parent);
 393                *dentry = dget(mounted->mnt_root);
 394                return 1;
 395        }
 396        spin_unlock(&dcache_lock);
 397        return 0;
 398}
 399
 400int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 401{
 402        return __follow_down(mnt,dentry);
 403}
 404 
 405static inline void follow_dotdot(struct nameidata *nd)
 406{
 407        while(1) {
 408                struct vfsmount *parent;
 409                struct dentry *dentry;
 410                read_lock(&current->fs->lock);
 411                if (nd->dentry == current->fs->root &&
 412                    nd->mnt == current->fs->rootmnt)  {
 413                        read_unlock(&current->fs->lock);
 414                        break;
 415                }
 416                read_unlock(&current->fs->lock);
 417                spin_lock(&dcache_lock);
 418                if (nd->dentry != nd->mnt->mnt_root) {
 419                        dentry = dget(nd->dentry->d_parent);
 420                        spin_unlock(&dcache_lock);
 421                        dput(nd->dentry);
 422                        nd->dentry = dentry;
 423                        break;
 424                }
 425                parent=nd->mnt->mnt_parent;
 426                if (parent == nd->mnt) {
 427                        spin_unlock(&dcache_lock);
 428                        break;
 429                }
 430                mntget(parent);
 431                dentry=dget(nd->mnt->mnt_mountpoint);
 432                spin_unlock(&dcache_lock);
 433                dput(nd->dentry);
 434                nd->dentry = dentry;
 435                mntput(nd->mnt);
 436                nd->mnt = parent;
 437        }
 438        while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
 439                ;
 440}
 441
 442/*
 443 * Name resolution.
 444 *
 445 * This is the basic name resolution function, turning a pathname
 446 * into the final dentry.
 447 *
 448 * We expect 'base' to be positive and a directory.
 449 */
 450int fastcall link_path_walk(const char * name, struct nameidata *nd)
 451{
 452        struct dentry *dentry;
 453        struct inode *inode;
 454        int err;
 455        unsigned int lookup_flags = nd->flags;
 456
 457        while (*name=='/')
 458                name++;
 459        if (!*name)
 460                goto return_reval;
 461
 462        inode = nd->dentry->d_inode;
 463        if (current->link_count)
 464                lookup_flags = LOOKUP_FOLLOW;
 465
 466        /* At this point we know we have a real path component. */
 467        for(;;) {
 468                unsigned long hash;
 469                struct qstr this;
 470                unsigned int c;
 471
 472                err = permission(inode, MAY_EXEC);
 473                dentry = ERR_PTR(err);
 474                if (err)
 475                        break;
 476
 477                this.name = name;
 478                c = *(const unsigned char *)name;
 479
 480                hash = init_name_hash();
 481                do {
 482                        name++;
 483                        hash = partial_name_hash(c, hash);
 484                        c = *(const unsigned char *)name;
 485                } while (c && (c != '/'));
 486                this.len = name - (const char *) this.name;
 487                this.hash = end_name_hash(hash);
 488
 489                /* remove trailing slashes? */
 490                if (!c)
 491                        goto last_component;
 492                while (*++name == '/');
 493                if (!*name)
 494                        goto last_with_slashes;
 495
 496                /*
 497                 * "." and ".." are special - ".." especially so because it has
 498                 * to be able to know about the current root directory and
 499                 * parent relationships.
 500                 */
 501                if (this.name[0] == '.') switch (this.len) {
 502                        default:
 503                                break;
 504                        case 2: 
 505                                if (this.name[1] != '.')
 506                                        break;
 507                                follow_dotdot(nd);
 508                                inode = nd->dentry->d_inode;
 509                                /* fallthrough */
 510                        case 1:
 511                                continue;
 512                }
 513                /*
 514                 * See if the low-level filesystem might want
 515                 * to use its own hash..
 516                 */
 517                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 518                        err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 519                        if (err < 0)
 520                                break;
 521                }
 522                /* This does the actual lookups.. */
 523                dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 524                if (!dentry) {
 525                        dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 526                        err = PTR_ERR(dentry);
 527                        if (IS_ERR(dentry))
 528                                break;
 529                }
 530                /* Check mountpoints.. */
 531                while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 532                        ;
 533
 534                err = -ENOENT;
 535                inode = dentry->d_inode;
 536                if (!inode)
 537                        goto out_dput;
 538                err = -ENOTDIR; 
 539                if (!inode->i_op)
 540                        goto out_dput;
 541
 542                if (inode->i_op->follow_link) {
 543                        struct vfsmount *mnt = mntget(nd->mnt);
 544                        err = do_follow_link(dentry, nd);
 545                        dput(dentry);
 546                        mntput(mnt);
 547                        if (err)
 548                                goto return_err;
 549                        err = -ENOENT;
 550                        inode = nd->dentry->d_inode;
 551                        if (!inode)
 552                                break;
 553                        err = -ENOTDIR; 
 554                        if (!inode->i_op)
 555                                break;
 556                } else {
 557                        dput(nd->dentry);
 558                        nd->dentry = dentry;
 559                }
 560                err = -ENOTDIR; 
 561                if (!inode->i_op->lookup)
 562                        break;
 563                continue;
 564                /* here ends the main loop */
 565
 566last_with_slashes:
 567                lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 568last_component:
 569                if (lookup_flags & LOOKUP_PARENT)
 570                        goto lookup_parent;
 571                if (this.name[0] == '.') switch (this.len) {
 572                        default:
 573                                break;
 574                        case 2: 
 575                                if (this.name[1] != '.')
 576                                        break;
 577                                follow_dotdot(nd);
 578                                inode = nd->dentry->d_inode;
 579                                /* fallthrough */
 580                        case 1:
 581                                goto return_reval;
 582                }
 583                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 584                        err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 585                        if (err < 0)
 586                                break;
 587                }
 588                dentry = cached_lookup(nd->dentry, &this, nd->flags);
 589                if (!dentry) {
 590                        dentry = real_lookup(nd->dentry, &this, nd->flags);
 591                        err = PTR_ERR(dentry);
 592                        if (IS_ERR(dentry))
 593                                break;
 594                }
 595                while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 596                        ;
 597                inode = dentry->d_inode;
 598                if ((lookup_flags & LOOKUP_FOLLOW)
 599                    && inode && inode->i_op && inode->i_op->follow_link) {
 600                        struct vfsmount *mnt = mntget(nd->mnt);
 601                        err = do_follow_link(dentry, nd);
 602                        dput(dentry);
 603                        mntput(mnt);
 604                        if (err)
 605                                goto return_err;
 606                        inode = nd->dentry->d_inode;
 607                } else {
 608                        dput(nd->dentry);
 609                        nd->dentry = dentry;
 610                }
 611                err = -ENOENT;
 612                if (!inode)
 613                        goto no_inode;
 614                if (lookup_flags & LOOKUP_DIRECTORY) {
 615                        err = -ENOTDIR; 
 616                        if (!inode->i_op || !inode->i_op->lookup)
 617                                break;
 618                }
 619                goto return_base;
 620no_inode:
 621                err = -ENOENT;
 622                if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
 623                        break;
 624                goto return_base;
 625lookup_parent:
 626                nd->last = this;
 627                nd->last_type = LAST_NORM;
 628                if (this.name[0] != '.')
 629                        goto return_base;
 630                if (this.len == 1)
 631                        nd->last_type = LAST_DOT;
 632                else if (this.len == 2 && this.name[1] == '.')
 633                        nd->last_type = LAST_DOTDOT;
 634                else
 635                        goto return_base;
 636return_reval:
 637                /*
 638                 * We bypassed the ordinary revalidation routines.
 639                 * Check the cached dentry for staleness.
 640                 */
 641                dentry = nd->dentry;
 642                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 643                        err = -ESTALE;
 644                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
 645                                d_invalidate(dentry);
 646                                break;
 647                        }
 648                }
 649return_base:
 650                return 0;
 651out_dput:
 652                dput(dentry);
 653                break;
 654        }
 655        path_release(nd);
 656return_err:
 657        return err;
 658}
 659
 660int fastcall path_walk(const char * name, struct nameidata *nd)
 661{
 662        current->total_link_count = 0;
 663        return link_path_walk(name, nd);
 664}
 665
 666/* SMP-safe */
 667/* returns 1 if everything is done */
 668static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 669{
 670        if (path_walk(name, nd))
 671                return 0;               /* something went wrong... */
 672
 673        if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
 674                struct nameidata nd_root;
 675                /*
 676                 * NAME was not found in alternate root or it's a directory.  Try to find
 677                 * it in the normal root:
 678                 */
 679                nd_root.last_type = LAST_ROOT;
 680                nd_root.flags = nd->flags;
 681                read_lock(&current->fs->lock);
 682                nd_root.mnt = mntget(current->fs->rootmnt);
 683                nd_root.dentry = dget(current->fs->root);
 684                read_unlock(&current->fs->lock);
 685                if (path_walk(name, &nd_root))
 686                        return 1;
 687                if (nd_root.dentry->d_inode) {
 688                        path_release(nd);
 689                        nd->dentry = nd_root.dentry;
 690                        nd->mnt = nd_root.mnt;
 691                        nd->last = nd_root.last;
 692                        return 1;
 693                }
 694                path_release(&nd_root);
 695        }
 696        return 1;
 697}
 698
 699void set_fs_altroot(void)
 700{
 701        char *emul = __emul_prefix();
 702        struct nameidata nd;
 703        struct vfsmount *mnt = NULL, *oldmnt;
 704        struct dentry *dentry = NULL, *olddentry;
 705        if (emul) {
 706                read_lock(&current->fs->lock);
 707                nd.mnt = mntget(current->fs->rootmnt);
 708                nd.dentry = dget(current->fs->root);
 709                read_unlock(&current->fs->lock);
 710                nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
 711                if (path_walk(emul,&nd) == 0) {
 712                        mnt = nd.mnt;
 713                        dentry = nd.dentry;
 714                }
 715        }
 716        write_lock(&current->fs->lock);
 717        oldmnt = current->fs->altrootmnt;
 718        olddentry = current->fs->altroot;
 719        current->fs->altrootmnt = mnt;
 720        current->fs->altroot = dentry;
 721        write_unlock(&current->fs->lock);
 722        if (olddentry) {
 723                dput(olddentry);
 724                mntput(oldmnt);
 725        }
 726}
 727
 728/* SMP-safe */
 729static inline int
 730walk_init_root(const char *name, struct nameidata *nd)
 731{
 732        read_lock(&current->fs->lock);
 733        if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 734                nd->mnt = mntget(current->fs->altrootmnt);
 735                nd->dentry = dget(current->fs->altroot);
 736                read_unlock(&current->fs->lock);
 737                if (__emul_lookup_dentry(name,nd))
 738                        return 0;
 739                read_lock(&current->fs->lock);
 740        }
 741        nd->mnt = mntget(current->fs->rootmnt);
 742        nd->dentry = dget(current->fs->root);
 743        read_unlock(&current->fs->lock);
 744        return 1;
 745}
 746
 747/* SMP-safe */
 748int fastcall path_lookup(const char *path, unsigned flags, struct nameidata *nd)
 749{
 750        int error = 0;
 751        if (path_init(path, flags, nd))
 752                error = path_walk(path, nd);
 753        return error;
 754}
 755
 756
 757/* SMP-safe */
 758int fastcall path_init(const char *name, unsigned int flags, struct nameidata *nd)
 759{
 760        nd->last_type = LAST_ROOT; /* if there are only slashes... */
 761        nd->flags = flags;
 762        if (*name=='/')
 763                return walk_init_root(name,nd);
 764        read_lock(&current->fs->lock);
 765        nd->mnt = mntget(current->fs->pwdmnt);
 766        nd->dentry = dget(current->fs->pwd);
 767        read_unlock(&current->fs->lock);
 768        return 1;
 769}
 770
 771/*
 772 * Restricted form of lookup. Doesn't follow links, single-component only,
 773 * needs parent already locked. Doesn't follow mounts.
 774 * SMP-safe.
 775 */
 776struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
 777{
 778        struct dentry * dentry;
 779        struct inode *inode;
 780        int err;
 781
 782        inode = base->d_inode;
 783        err = permission(inode, MAY_EXEC);
 784        dentry = ERR_PTR(err);
 785        if (err)
 786                goto out;
 787
 788        /*
 789         * See if the low-level filesystem might want
 790         * to use its own hash..
 791         */
 792        if (base->d_op && base->d_op->d_hash) {
 793                err = base->d_op->d_hash(base, name);
 794                dentry = ERR_PTR(err);
 795                if (err < 0)
 796                        goto out;
 797        }
 798
 799        dentry = cached_lookup(base, name, 0);
 800        if (!dentry) {
 801                struct dentry *new = d_alloc(base, name);
 802                dentry = ERR_PTR(-ENOMEM);
 803                if (!new)
 804                        goto out;
 805                lock_kernel();
 806                dentry = inode->i_op->lookup(inode, new);
 807                unlock_kernel();
 808                if (!dentry)
 809                        dentry = new;
 810                else
 811                        dput(new);
 812        }
 813out:
 814        return dentry;
 815}
 816
 817/* SMP-safe */
 818struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
 819{
 820        unsigned long hash;
 821        struct qstr this;
 822        unsigned int c;
 823
 824        this.name = name;
 825        this.len = len;
 826        if (!len)
 827                goto access;
 828
 829        hash = init_name_hash();
 830        while (len--) {
 831                c = *(const unsigned char *)name++;
 832                if (c == '/' || c == '\0')
 833                        goto access;
 834                hash = partial_name_hash(c, hash);
 835        }
 836        this.hash = end_name_hash(hash);
 837
 838        return lookup_hash(&this, base);
 839access:
 840        return ERR_PTR(-EACCES);
 841}
 842
 843/*
 844 *      namei()
 845 *
 846 * is used by most simple commands to get the inode of a specified name.
 847 * Open, link etc use their own routines, but this is enough for things
 848 * like 'chmod' etc.
 849 *
 850 * namei exists in two versions: namei/lnamei. The only difference is
 851 * that namei follows links, while lnamei does not.
 852 * SMP-safe
 853 */
 854int fastcall __user_walk(const char *name, unsigned flags, struct nameidata *nd)
 855{
 856        char *tmp;
 857        int err;
 858
 859        tmp = getname(name);
 860        err = PTR_ERR(tmp);
 861        if (!IS_ERR(tmp)) {
 862                err = 0;
 863                err = path_lookup(tmp, flags, nd);
 864                putname(tmp);
 865        }
 866        return err;
 867}
 868
 869/*
 870 * It's inline, so penalty for filesystems that don't use sticky bit is
 871 * minimal.
 872 */
 873static inline int check_sticky(struct inode *dir, struct inode *inode)
 874{
 875        if (!(dir->i_mode & S_ISVTX))
 876                return 0;
 877        if (inode->i_uid == current->fsuid)
 878                return 0;
 879        if (dir->i_uid == current->fsuid)
 880                return 0;
 881        return !capable(CAP_FOWNER);
 882}
 883
 884/*
 885 *      Check whether we can remove a link victim from directory dir, check
 886 *  whether the type of victim is right.
 887 *  1. We can't do it if dir is read-only (done in permission())
 888 *  2. We should have write and exec permissions on dir
 889 *  3. We can't remove anything from append-only dir
 890 *  4. We can't do anything with immutable dir (done in permission())
 891 *  5. If the sticky bit on dir is set we should either
 892 *      a. be owner of dir, or
 893 *      b. be owner of victim, or
 894 *      c. have CAP_FOWNER capability
 895 *  6. If the victim is append-only or immutable we can't do antyhing with
 896 *     links pointing to it.
 897 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 898 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 899 *  9. We can't remove a root or mountpoint.
 900 */
 901static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 902{
 903        int error;
 904        if (!victim->d_inode || victim->d_parent->d_inode != dir)
 905                return -ENOENT;
 906        error = permission(dir,MAY_WRITE | MAY_EXEC);
 907        if (error)
 908                return error;
 909        if (IS_APPEND(dir))
 910                return -EPERM;
 911        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 912            IS_IMMUTABLE(victim->d_inode))
 913                return -EPERM;
 914        if (isdir) {
 915                if (!S_ISDIR(victim->d_inode->i_mode))
 916                        return -ENOTDIR;
 917                if (IS_ROOT(victim))
 918                        return -EBUSY;
 919        } else if (S_ISDIR(victim->d_inode->i_mode))
 920                return -EISDIR;
 921        if (IS_DEADDIR(dir))
 922                return -ENOENT;
 923        return 0;
 924}
 925
 926/*      Check whether we can create an object with dentry child in directory
 927 *  dir.
 928 *  1. We can't do it if child already exists (open has special treatment for
 929 *     this case, but since we are inlined it's OK)
 930 *  2. We can't do it if dir is read-only (done in permission())
 931 *  3. We should have write and exec permissions on dir
 932 *  4. We can't do it if dir is immutable (done in permission())
 933 */
 934static inline int may_create(struct inode *dir, struct dentry *child) {
 935        if (child->d_inode)
 936                return -EEXIST;
 937        if (IS_DEADDIR(dir))
 938                return -ENOENT;
 939        return permission(dir,MAY_WRITE | MAY_EXEC);
 940}
 941
 942/* 
 943 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 944 * reasons.
 945 *
 946 * O_DIRECTORY translates into forcing a directory lookup.
 947 */
 948static inline int lookup_flags(unsigned int f)
 949{
 950        unsigned long retval = LOOKUP_FOLLOW;
 951
 952        if (f & O_NOFOLLOW)
 953                retval &= ~LOOKUP_FOLLOW;
 954        
 955        if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 956                retval &= ~LOOKUP_FOLLOW;
 957        
 958        if (f & O_DIRECTORY)
 959                retval |= LOOKUP_DIRECTORY;
 960
 961        return retval;
 962}
 963
 964int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
 965{
 966        int error;
 967
 968        mode &= S_IALLUGO;
 969        mode |= S_IFREG;
 970
 971        down(&dir->i_zombie);
 972        error = may_create(dir, dentry);
 973        if (error)
 974                goto exit_lock;
 975
 976        error = -EACCES;        /* shouldn't it be ENOSYS? */
 977        if (!dir->i_op || !dir->i_op->create)
 978                goto exit_lock;
 979
 980        DQUOT_INIT(dir);
 981        lock_kernel();
 982        error = dir->i_op->create(dir, dentry, mode);
 983        unlock_kernel();
 984exit_lock:
 985        up(&dir->i_zombie);
 986        if (!error)
 987                inode_dir_notify(dir, DN_CREATE);
 988        return error;
 989}
 990
 991/*
 992 *      open_namei()
 993 *
 994 * namei for open - this is in fact almost the whole open-routine.
 995 *
 996 * Note that the low bits of "flag" aren't the same as in the open
 997 * system call - they are 00 - no permissions needed
 998 *                        01 - read permission needed
 999 *                        10 - write permission needed
1000 *                        11 - read/write permissions needed
1001 * which is a lot more logical, and also allows the "no perm" needed
1002 * for symlinks (where the permissions are checked later).
1003 * SMP-safe
1004 */
1005int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1006{
1007        int acc_mode, error = 0;
1008        struct inode *inode;
1009        struct dentry *dentry;
1010        struct vfsmount *mnt;
1011        struct dentry *dir;
1012        int count = 0;
1013
1014        acc_mode = ACC_MODE(flag);
1015
1016        /*
1017         * The simplest case - just a plain lookup.
1018         */
1019        if (!(flag & O_CREAT)) {
1020                error = path_lookup(pathname, lookup_flags(flag), nd);
1021                if (error)
1022                        return error;
1023                dentry = nd->dentry;
1024                goto ok;
1025        }
1026
1027        /*
1028         * Create - we need to know the parent.
1029         */
1030        error = path_lookup(pathname, LOOKUP_PARENT, nd);
1031        if (error)
1032                return error;
1033
1034        /*
1035         * We have the parent and last component. First of all, check
1036         * that we are not asked to creat(2) an obvious directory - that
1037         * will not do.
1038         */
1039        error = -EISDIR;
1040        if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1041                goto exit;
1042
1043        dir = nd->dentry;
1044        down(&dir->d_inode->i_sem);
1045        dentry = lookup_hash(&nd->last, nd->dentry);
1046
1047do_last:
1048        error = PTR_ERR(dentry);
1049        if (IS_ERR(dentry)) {
1050                up(&dir->d_inode->i_sem);
1051                goto exit;
1052        }
1053
1054        /* Negative dentry, just create the file */
1055        if (!dentry->d_inode) {
1056                error = vfs_create(dir->d_inode, dentry,
1057                                   mode & ~current->fs->umask);
1058                up(&dir->d_inode->i_sem);
1059                dput(nd->dentry);
1060                nd->dentry = dentry;
1061                if (error)
1062                        goto exit;
1063                /* Don't check for write permission, don't truncate */
1064                acc_mode = 0;
1065                flag &= ~O_TRUNC;
1066                goto ok;
1067        }
1068
1069        /*
1070         * It already exists.
1071         */
1072        up(&dir->d_inode->i_sem);
1073
1074        error = -EEXIST;
1075        if (flag & O_EXCL)
1076                goto exit_dput;
1077
1078        if (d_mountpoint(dentry)) {
1079                error = -ELOOP;
1080                if (flag & O_NOFOLLOW)
1081                        goto exit_dput;
1082                while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1083        }
1084        error = -ENOENT;
1085        if (!dentry->d_inode)
1086                goto exit_dput;
1087        if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1088                goto do_link;
1089
1090        dput(nd->dentry);
1091        nd->dentry = dentry;
1092        error = -EISDIR;
1093        if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1094                goto exit;
1095ok:
1096        error = -ENOENT;
1097        inode = dentry->d_inode;
1098        if (!inode)
1099                goto exit;
1100
1101        error = -ELOOP;
1102        if (S_ISLNK(inode->i_mode))
1103                goto exit;
1104        
1105        error = -EISDIR;
1106        if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1107                goto exit;
1108
1109        error = permission(inode,acc_mode);
1110        if (error)
1111                goto exit;
1112
1113        /*
1114         * FIFO's, sockets and device files are special: they don't
1115         * actually live on the filesystem itself, and as such you
1116         * can write to them even if the filesystem is read-only.
1117         */
1118        if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1119                flag &= ~O_TRUNC;
1120        } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1121                error = -EACCES;
1122                if (nd->mnt->mnt_flags & MNT_NODEV)
1123                        goto exit;
1124
1125                flag &= ~O_TRUNC;
1126        } else {
1127                error = -EROFS;
1128                if (IS_RDONLY(inode) && (flag & 2))
1129                        goto exit;
1130        }
1131        /*
1132         * An append-only file must be opened in append mode for writing.
1133         */
1134        error = -EPERM;
1135        if (IS_APPEND(inode)) {
1136                if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1137                        goto exit;
1138                if (flag & O_TRUNC)
1139                        goto exit;
1140        }
1141
1142        /*
1143         * Ensure there are no outstanding leases on the file.
1144         */
1145        error = get_lease(inode, flag);
1146        if (error)
1147                goto exit;
1148
1149        if (flag & O_TRUNC) {
1150                error = get_write_access(inode);
1151                if (error)
1152                        goto exit;
1153
1154                /*
1155                 * Refuse to truncate files with mandatory locks held on them.
1156                 */
1157                error = locks_verify_locked(inode);
1158                if (!error) {
1159                        DQUOT_INIT(inode);
1160                        
1161                        error = do_truncate(dentry, 0);
1162                }
1163                put_write_access(inode);
1164                if (error)
1165                        goto exit;
1166        } else
1167                if (flag & FMODE_WRITE)
1168                        DQUOT_INIT(inode);
1169
1170        return 0;
1171
1172exit_dput:
1173        dput(dentry);
1174exit:
1175        path_release(nd);
1176        return error;
1177
1178do_link:
1179        error = -ELOOP;
1180        if (flag & O_NOFOLLOW)
1181                goto exit_dput;
1182        /*
1183         * This is subtle. Instead of calling do_follow_link() we do the
1184         * thing by hands. The reason is that this way we have zero link_count
1185         * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1186         * After that we have the parent and last component, i.e.
1187         * we are in the same situation as after the first path_walk().
1188         * Well, almost - if the last component is normal we get its copy
1189         * stored in nd->last.name and we will have to putname() it when we
1190         * are done. Procfs-like symlinks just set LAST_BIND.
1191         */
1192        UPDATE_ATIME(dentry->d_inode);
1193        mnt = mntget(nd->mnt);
1194        error = dentry->d_inode->i_op->follow_link(dentry, nd);
1195        dput(dentry);
1196        mntput(mnt);
1197        if (error)
1198                return error;
1199        if (nd->last_type == LAST_BIND) {
1200                dentry = nd->dentry;
1201                goto ok;
1202        }
1203        error = -EISDIR;
1204        if (nd->last_type != LAST_NORM)
1205                goto exit;
1206        if (nd->last.name[nd->last.len]) {
1207                putname(nd->last.name);
1208                goto exit;
1209        }
1210        error = -ELOOP;
1211        if (count++==32) {
1212                putname(nd->last.name);
1213                goto exit;
1214        }
1215        dir = nd->dentry;
1216        down(&dir->d_inode->i_sem);
1217        dentry = lookup_hash(&nd->last, nd->dentry);
1218        putname(nd->last.name);
1219        goto do_last;
1220}
1221
1222/* SMP-safe */
1223static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1224{
1225        struct dentry *dentry;
1226
1227        down(&nd->dentry->d_inode->i_sem);
1228        dentry = ERR_PTR(-EEXIST);
1229        if (nd->last_type != LAST_NORM)
1230                goto fail;
1231        dentry = lookup_hash(&nd->last, nd->dentry);
1232        if (IS_ERR(dentry))
1233                goto fail;
1234        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1235                goto enoent;
1236        return dentry;
1237enoent:
1238        dput(dentry);
1239        dentry = ERR_PTR(-ENOENT);
1240fail:
1241        return dentry;
1242}
1243
1244int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1245{
1246        int error = -EPERM;
1247
1248        down(&dir->i_zombie);
1249        if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1250                goto exit_lock;
1251
1252        error = may_create(dir, dentry);
1253        if (error)
1254                goto exit_lock;
1255
1256        error = -EPERM;
1257        if (!dir->i_op || !dir->i_op->mknod)
1258                goto exit_lock;
1259
1260        DQUOT_INIT(dir);
1261        lock_kernel();
1262        error = dir->i_op->mknod(dir, dentry, mode, dev);
1263        unlock_kernel();
1264exit_lock:
1265        up(&dir->i_zombie);
1266        if (!error)
1267                inode_dir_notify(dir, DN_CREATE);
1268        return error;
1269}
1270
1271asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1272{
1273        int error = 0;
1274        char * tmp;
1275        struct dentry * dentry;
1276        struct nameidata nd;
1277
1278        if (S_ISDIR(mode))
1279                return -EPERM;
1280        tmp = getname(filename);
1281        if (IS_ERR(tmp))
1282                return PTR_ERR(tmp);
1283
1284        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1285        if (error)
1286                goto out;
1287        dentry = lookup_create(&nd, 0);
1288        error = PTR_ERR(dentry);
1289
1290        mode &= ~current->fs->umask;
1291        if (!IS_ERR(dentry)) {
1292                switch (mode & S_IFMT) {
1293                case 0: case S_IFREG:
1294                        error = vfs_create(nd.dentry->d_inode,dentry,mode);
1295                        break;
1296                case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1297                        error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1298                        break;
1299                case S_IFDIR:
1300                        error = -EPERM;
1301                        break;
1302                default:
1303                        error = -EINVAL;
1304                }
1305                dput(dentry);
1306        }
1307        up(&nd.dentry->d_inode->i_sem);
1308        path_release(&nd);
1309out:
1310        putname(tmp);
1311
1312        return error;
1313}
1314
1315int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1316{
1317        int error;
1318
1319        down(&dir->i_zombie);
1320        error = may_create(dir, dentry);
1321        if (error)
1322                goto exit_lock;
1323
1324        error = -EPERM;
1325        if (!dir->i_op || !dir->i_op->mkdir)
1326                goto exit_lock;
1327
1328        DQUOT_INIT(dir);
1329        mode &= (S_IRWXUGO|S_ISVTX);
1330        lock_kernel();
1331        error = dir->i_op->mkdir(dir, dentry, mode);
1332        unlock_kernel();
1333
1334exit_lock:
1335        up(&dir->i_zombie);
1336        if (!error)
1337                inode_dir_notify(dir, DN_CREATE);
1338        return error;
1339}
1340
1341asmlinkage long sys_mkdir(const char * pathname, int mode)
1342{
1343        int error = 0;
1344        char * tmp;
1345
1346        tmp = getname(pathname);
1347        error = PTR_ERR(tmp);
1348        if (!IS_ERR(tmp)) {
1349                struct dentry *dentry;
1350                struct nameidata nd;
1351
1352                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1353                if (error)
1354                        goto out;
1355                dentry = lookup_create(&nd, 1);
1356                error = PTR_ERR(dentry);
1357                if (!IS_ERR(dentry)) {
1358                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
1359                                          mode & ~current->fs->umask);
1360                        dput(dentry);
1361                }
1362                up(&nd.dentry->d_inode->i_sem);
1363                path_release(&nd);
1364out:
1365                putname(tmp);
1366        }
1367
1368        return error;
1369}
1370
1371/*
1372 * We try to drop the dentry early: we should have
1373 * a usage count of 2 if we're the only user of this
1374 * dentry, and if that is true (possibly after pruning
1375 * the dcache), then we drop the dentry now.
1376 *
1377 * A low-level filesystem can, if it choses, legally
1378 * do a
1379 *
1380 *      if (!d_unhashed(dentry))
1381 *              return -EBUSY;
1382 *
1383 * if it cannot handle the case of removing a directory
1384 * that is still in use by something else..
1385 */
1386static void d_unhash(struct dentry *dentry)
1387{
1388        dget(dentry);
1389        spin_lock(&dcache_lock);
1390        switch (atomic_read(&dentry->d_count)) {
1391        default:
1392                spin_unlock(&dcache_lock);
1393                shrink_dcache_parent(dentry);
1394                spin_lock(&dcache_lock);
1395                if (atomic_read(&dentry->d_count) != 2)
1396                        break;
1397        case 2:
1398                list_del_init(&dentry->d_hash);
1399        }
1400        spin_unlock(&dcache_lock);
1401}
1402
1403int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1404{
1405        int error;
1406
1407        error = may_delete(dir, dentry, 1);
1408        if (error)
1409                return error;
1410
1411        if (!dir->i_op || !dir->i_op->rmdir)
1412                return -EPERM;
1413
1414        DQUOT_INIT(dir);
1415
1416        double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1417        d_unhash(dentry);
1418        if (d_mountpoint(dentry))
1419                error = -EBUSY;
1420        else {
1421                lock_kernel();
1422                error = dir->i_op->rmdir(dir, dentry);
1423                unlock_kernel();
1424                if (!error)
1425                        dentry->d_inode->i_flags |= S_DEAD;
1426        }
1427        double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1428        if (!error) {
1429                inode_dir_notify(dir, DN_DELETE);
1430                d_delete(dentry);
1431        }
1432        dput(dentry);
1433
1434        return error;
1435}
1436
1437asmlinkage long sys_rmdir(const char * pathname)
1438{
1439        int error = 0;
1440        char * name;
1441        struct dentry *dentry;
1442        struct nameidata nd;
1443
1444        name = getname(pathname);
1445        if(IS_ERR(name))
1446                return PTR_ERR(name);
1447
1448        error = path_lookup(name, LOOKUP_PARENT, &nd);
1449        if (error)
1450                goto exit;
1451
1452        switch(nd.last_type) {
1453                case LAST_DOTDOT:
1454                        error = -ENOTEMPTY;
1455                        goto exit1;
1456                case LAST_DOT:
1457                        error = -EINVAL;
1458                        goto exit1;
1459                case LAST_ROOT:
1460                        error = -EBUSY;
1461                        goto exit1;
1462        }
1463        down(&nd.dentry->d_inode->i_sem);
1464        dentry = lookup_hash(&nd.last, nd.dentry);
1465        error = PTR_ERR(dentry);
1466        if (!IS_ERR(dentry)) {
1467                error = vfs_rmdir(nd.dentry->d_inode, dentry);
1468                dput(dentry);
1469        }
1470        up(&nd.dentry->d_inode->i_sem);
1471exit1:
1472        path_release(&nd);
1473exit:
1474        putname(name);
1475        return error;
1476}
1477
1478int vfs_unlink(struct inode *dir, struct dentry *dentry)
1479{
1480        int error;
1481
1482        down(&dir->i_zombie);
1483        error = may_delete(dir, dentry, 0);
1484        if (!error) {
1485                error = -EPERM;
1486                if (dir->i_op && dir->i_op->unlink) {
1487                        DQUOT_INIT(dir);
1488                        if (d_mountpoint(dentry))
1489                                error = -EBUSY;
1490                        else {
1491                                lock_kernel();
1492                                error = dir->i_op->unlink(dir, dentry);
1493                                unlock_kernel();
1494                                if (!error)
1495                                        d_delete(dentry);
1496                        }
1497                }
1498        }
1499        up(&dir->i_zombie);
1500        if (!error)
1501                inode_dir_notify(dir, DN_DELETE);
1502        return error;
1503}
1504
1505asmlinkage long sys_unlink(const char * pathname)
1506{
1507        int error = 0;
1508        char * name;
1509        struct dentry *dentry;
1510        struct nameidata nd;
1511
1512        name = getname(pathname);
1513        if(IS_ERR(name))
1514                return PTR_ERR(name);
1515
1516        error = path_lookup(name, LOOKUP_PARENT, &nd);
1517        if (error)
1518                goto exit;
1519        error = -EISDIR;
1520        if (nd.last_type != LAST_NORM)
1521                goto exit1;
1522        down(&nd.dentry->d_inode->i_sem);
1523        dentry = lookup_hash(&nd.last, nd.dentry);
1524        error = PTR_ERR(dentry);
1525        if (!IS_ERR(dentry)) {
1526                /* Why not before? Because we want correct error value */
1527                if (nd.last.name[nd.last.len])
1528                        goto slashes;
1529                error = vfs_unlink(nd.dentry->d_inode, dentry);
1530        exit2:
1531                dput(dentry);
1532        }
1533        up(&nd.dentry->d_inode->i_sem);
1534exit1:
1535        path_release(&nd);
1536exit:
1537        putname(name);
1538
1539        return error;
1540
1541slashes:
1542        error = !dentry->d_inode ? -ENOENT :
1543                S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1544        goto exit2;
1545}
1546
1547int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1548{
1549        int error;
1550
1551        down(&dir->i_zombie);
1552        error = may_create(dir, dentry);
1553        if (error)
1554                goto exit_lock;
1555
1556        error = -EPERM;
1557        if (!dir->i_op || !dir->i_op->symlink)
1558                goto exit_lock;
1559
1560        DQUOT_INIT(dir);
1561        lock_kernel();
1562        error = dir->i_op->symlink(dir, dentry, oldname);
1563        unlock_kernel();
1564
1565exit_lock:
1566        up(&dir->i_zombie);
1567        if (!error)
1568                inode_dir_notify(dir, DN_CREATE);
1569        return error;
1570}
1571
1572asmlinkage long sys_symlink(const char * oldname, const char * newname)
1573{
1574        int error = 0;
1575        char * from;
1576        char * to;
1577
1578        from = getname(oldname);
1579        if(IS_ERR(from))
1580                return PTR_ERR(from);
1581        to = getname(newname);
1582        error = PTR_ERR(to);
1583        if (!IS_ERR(to)) {
1584                struct dentry *dentry;
1585                struct nameidata nd;
1586
1587                error = path_lookup(to, LOOKUP_PARENT, &nd);
1588                if (error)
1589                        goto out;
1590                dentry = lookup_create(&nd, 0);
1591                error = PTR_ERR(dentry);
1592                if (!IS_ERR(dentry)) {
1593                        error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1594                        dput(dentry);
1595                }
1596                up(&nd.dentry->d_inode->i_sem);
1597                path_release(&nd);
1598out:
1599                putname(to);
1600        }
1601        putname(from);
1602        return error;
1603}
1604
1605int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1606{
1607        struct inode *inode;
1608        int error;
1609
1610        down(&dir->i_zombie);
1611        error = -ENOENT;
1612        inode = old_dentry->d_inode;
1613        if (!inode)
1614                goto exit_lock;
1615
1616        error = may_create(dir, new_dentry);
1617        if (error)
1618                goto exit_lock;
1619
1620        error = -EXDEV;
1621        if (dir->i_dev != inode->i_dev)
1622                goto exit_lock;
1623
1624        /*
1625         * A link to an append-only or immutable file cannot be created.
1626         */
1627        error = -EPERM;
1628        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1629                goto exit_lock;
1630        if (!dir->i_op || !dir->i_op->link)
1631                goto exit_lock;
1632
1633        DQUOT_INIT(dir);
1634        lock_kernel();
1635        error = dir->i_op->link(old_dentry, dir, new_dentry);
1636        unlock_kernel();
1637
1638exit_lock:
1639        up(&dir->i_zombie);
1640        if (!error)
1641                inode_dir_notify(dir, DN_CREATE);
1642        return error;
1643}
1644
1645/*
1646 * Hardlinks are often used in delicate situations.  We avoid
1647 * security-related surprises by not following symlinks on the
1648 * newname.  --KAB
1649 *
1650 * We don't follow them on the oldname either to be compatible
1651 * with linux 2.0, and to avoid hard-linking to directories
1652 * and other special files.  --ADM
1653 */
1654asmlinkage long sys_link(const char * oldname, const char * newname)
1655{
1656        int error;
1657        char * to;
1658
1659        to = getname(newname);
1660        error = PTR_ERR(to);
1661        if (!IS_ERR(to)) {
1662                struct dentry *new_dentry;
1663                struct nameidata nd, old_nd;
1664
1665                error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
1666                if (error)
1667                        goto exit;
1668                error = path_lookup(to, LOOKUP_PARENT, &nd);
1669                if (error)
1670                        goto out;
1671                error = -EXDEV;
1672                if (old_nd.mnt != nd.mnt)
1673                        goto out_release;
1674                new_dentry = lookup_create(&nd, 0);
1675                error = PTR_ERR(new_dentry);
1676                if (!IS_ERR(new_dentry)) {
1677                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1678                        dput(new_dentry);
1679                }
1680                up(&nd.dentry->d_inode->i_sem);
1681out_release:
1682                path_release(&nd);
1683out:
1684                path_release(&old_nd);
1685exit:
1686                putname(to);
1687        }
1688        return error;
1689}
1690
1691/*
1692 * The worst of all namespace operations - renaming directory. "Perverted"
1693 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1694 * Problems:
1695 *      a) we can get into loop creation. Check is done in is_subdir().
1696 *      b) race potential - two innocent renames can create a loop together.
1697 *         That's where 4.4 screws up. Current fix: serialization on
1698 *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1699 *         story.
1700 *      c) we have to lock _three_ objects - parents and victim (if it exists).
1701 *         And that - after we got ->i_sem on parents (until then we don't know
1702 *         whether the target exists at all, let alone whether it is a directory
1703 *         or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1704 *         on link creation/removal of any kind. And taken (without ->i_sem) on
1705 *         directory that will be removed (both in rmdir() and here).
1706 *      d) some filesystems don't support opened-but-unlinked directories,
1707 *         either because of layout or because they are not ready to deal with
1708 *         all cases correctly. The latter will be fixed (taking this sort of
1709 *         stuff into VFS), but the former is not going away. Solution: the same
1710 *         trick as in rmdir().
1711 *      e) conversion from fhandle to dentry may come in the wrong moment - when
1712 *         we are removing the target. Solution: we will have to grab ->i_zombie
1713 *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1714 *         ->i_sem on parents, which works but leads to some truely excessive
1715 *         locking].
1716 */
1717int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1718               struct inode *new_dir, struct dentry *new_dentry)
1719{
1720        int error;
1721        struct inode *target;
1722
1723        if (old_dentry->d_inode == new_dentry->d_inode)
1724                return 0;
1725
1726        error = may_delete(old_dir, old_dentry, 1);
1727        if (error)
1728                return error;
1729
1730        if (new_dir->i_dev != old_dir->i_dev)
1731                return -EXDEV;
1732
1733        if (!new_dentry->d_inode)
1734                error = may_create(new_dir, new_dentry);
1735        else
1736                error = may_delete(new_dir, new_dentry, 1);
1737        if (error)
1738                return error;
1739
1740        if (!old_dir->i_op || !old_dir->i_op->rename)
1741                return -EPERM;
1742
1743        /*
1744         * If we are going to change the parent - check write permissions,
1745         * we'll need to flip '..'.
1746         */
1747        if (new_dir != old_dir) {
1748                error = permission(old_dentry->d_inode, MAY_WRITE);
1749        }
1750        if (error)
1751                return error;
1752
1753        DQUOT_INIT(old_dir);
1754        DQUOT_INIT(new_dir);
1755        down(&old_dir->i_sb->s_vfs_rename_sem);
1756        error = -EINVAL;
1757        if (is_subdir(new_dentry, old_dentry))
1758                goto out_unlock;
1759        /* Don't eat your daddy, dear... */
1760        /* This also avoids locking issues */
1761        if (old_dentry->d_parent == new_dentry)
1762                goto out_unlock;
1763        target = new_dentry->d_inode;
1764        if (target) { /* Hastur! Hastur! Hastur! */
1765                triple_down(&old_dir->i_zombie,
1766                            &new_dir->i_zombie,
1767                            &target->i_zombie);
1768                d_unhash(new_dentry);
1769        } else
1770                double_down(&old_dir->i_zombie,
1771                            &new_dir->i_zombie);
1772        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1773                error = -EBUSY;
1774        else 
1775                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1776        if (target) {
1777                if (!error)
1778                        target->i_flags |= S_DEAD;
1779                triple_up(&old_dir->i_zombie,
1780                          &new_dir->i_zombie,
1781                          &target->i_zombie);
1782                if (d_unhashed(new_dentry))
1783                        d_rehash(new_dentry);
1784                dput(new_dentry);
1785        } else
1786                double_up(&old_dir->i_zombie,
1787                          &new_dir->i_zombie);
1788                
1789        if (!error)
1790                d_move(old_dentry,new_dentry);
1791out_unlock:
1792        up(&old_dir->i_sb->s_vfs_rename_sem);
1793        return error;
1794}
1795
1796int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1797               struct inode *new_dir, struct dentry *new_dentry)
1798{
1799        int error;
1800
1801        if (old_dentry->d_inode == new_dentry->d_inode)
1802                return 0;
1803
1804        error = may_delete(old_dir, old_dentry, 0);
1805        if (error)
1806                return error;
1807
1808        if (new_dir->i_dev != old_dir->i_dev)
1809                return -EXDEV;
1810
1811        if (!new_dentry->d_inode)
1812                error = may_create(new_dir, new_dentry);
1813        else
1814                error = may_delete(new_dir, new_dentry, 0);
1815        if (error)
1816                return error;
1817
1818        if (!old_dir->i_op || !old_dir->i_op->rename)
1819                return -EPERM;
1820
1821        DQUOT_INIT(old_dir);
1822        DQUOT_INIT(new_dir);
1823        double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1824        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1825                error = -EBUSY;
1826        else
1827                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1828        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1829        if (error)
1830                return error;
1831        /* The following d_move() should become unconditional */
1832        if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1833                d_move(old_dentry, new_dentry);
1834        }
1835        return 0;
1836}
1837
1838int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1839               struct inode *new_dir, struct dentry *new_dentry)
1840{
1841        int error;
1842        if (S_ISDIR(old_dentry->d_inode->i_mode))
1843                error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1844        else
1845                error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1846        if (!error) {
1847                if (old_dir == new_dir)
1848                        inode_dir_notify(old_dir, DN_RENAME);
1849                else {
1850                        inode_dir_notify(old_dir, DN_DELETE);
1851                        inode_dir_notify(new_dir, DN_CREATE);
1852                }
1853        }
1854        return error;
1855}
1856
1857static inline int do_rename(const char * oldname, const char * newname)
1858{
1859        int error = 0;
1860        struct dentry * old_dir, * new_dir;
1861        struct dentry * old_dentry, *new_dentry;
1862        struct nameidata oldnd, newnd;
1863
1864        error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
1865        if (error)
1866                goto exit;
1867
1868        error = path_lookup(newname, LOOKUP_PARENT, &newnd);
1869        if (error)
1870                goto exit1;
1871
1872        error = -EXDEV;
1873        if (oldnd.mnt != newnd.mnt)
1874                goto exit2;
1875
1876        old_dir = oldnd.dentry;
1877        error = -EBUSY;
1878        if (oldnd.last_type != LAST_NORM)
1879                goto exit2;
1880
1881        new_dir = newnd.dentry;
1882        if (newnd.last_type != LAST_NORM)
1883                goto exit2;
1884
1885        double_lock(new_dir, old_dir);
1886
1887        old_dentry = lookup_hash(&oldnd.last, old_dir);
1888        error = PTR_ERR(old_dentry);
1889        if (IS_ERR(old_dentry))
1890                goto exit3;
1891        /* source must exist */
1892        error = -ENOENT;
1893        if (!old_dentry->d_inode)
1894                goto exit4;
1895        /* unless the source is a directory trailing slashes give -ENOTDIR */
1896        if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1897                error = -ENOTDIR;
1898                if (oldnd.last.name[oldnd.last.len])
1899                        goto exit4;
1900                if (newnd.last.name[newnd.last.len])
1901                        goto exit4;
1902        }
1903        new_dentry = lookup_hash(&newnd.last, new_dir);
1904        error = PTR_ERR(new_dentry);
1905        if (IS_ERR(new_dentry))
1906                goto exit4;
1907
1908        lock_kernel();
1909        error = vfs_rename(old_dir->d_inode, old_dentry,
1910                                   new_dir->d_inode, new_dentry);
1911        unlock_kernel();
1912
1913        dput(new_dentry);
1914exit4:
1915        dput(old_dentry);
1916exit3:
1917        double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1918exit2:
1919        path_release(&newnd);
1920exit1:
1921        path_release(&oldnd);
1922exit:
1923        return error;
1924}
1925
1926asmlinkage long sys_rename(const char * oldname, const char * newname)
1927{
1928        int error;
1929        char * from;
1930        char * to;
1931
1932        from = getname(oldname);
1933        if(IS_ERR(from))
1934                return PTR_ERR(from);
1935        to = getname(newname);
1936        error = PTR_ERR(to);
1937        if (!IS_ERR(to)) {
1938                error = do_rename(from,to);
1939                putname(to);
1940        }
1941        putname(from);
1942        return error;
1943}
1944
1945int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1946{
1947        int len;
1948
1949        len = PTR_ERR(link);
1950        if (IS_ERR(link))
1951                goto out;
1952
1953        len = strlen(link);
1954        if (len > (unsigned) buflen)
1955                len = buflen;
1956        if (copy_to_user(buffer, link, len))
1957                len = -EFAULT;
1958out:
1959        return len;
1960}
1961
1962static inline int
1963__vfs_follow_link(struct nameidata *nd, const char *link)
1964{
1965        int res = 0;
1966        char *name;
1967        if (IS_ERR(link))
1968                goto fail;
1969
1970        if (*link == '/') {
1971                path_release(nd);
1972                if (!walk_init_root(link, nd))
1973                        /* weird __emul_prefix() stuff did it */
1974                        goto out;
1975        }
1976        res = link_path_walk(link, nd);
1977out:
1978        if (current->link_count || res || nd->last_type!=LAST_NORM)
1979                return res;
1980        /*
1981         * If it is an iterative symlinks resolution in open_namei() we
1982         * have to copy the last component. And all that crap because of
1983         * bloody create() on broken symlinks. Furrfu...
1984         */
1985        name = __getname();
1986        if (!name) {
1987                path_release(nd);
1988                return -ENOMEM;
1989        }
1990        strcpy(name, nd->last.name);
1991        nd->last.name = name;
1992        return 0;
1993fail:
1994        path_release(nd);
1995        return PTR_ERR(link);
1996}
1997
1998int vfs_follow_link(struct nameidata *nd, const char *link)
1999{
2000        return __vfs_follow_link(nd, link);
2001}
2002
2003/* get the link contents into pagecache */
2004static char *page_getlink(struct dentry * dentry, struct page **ppage)
2005{
2006        struct page * page;
2007        struct address_space *mapping = dentry->d_inode->i_mapping;
2008        page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2009                                NULL);
2010        if (IS_ERR(page))
2011                goto sync_fail;
2012        wait_on_page(page);
2013        if (!Page_Uptodate(page))
2014                goto async_fail;
2015        *ppage = page;
2016        return kmap(page);
2017
2018async_fail:
2019        page_cache_release(page);
2020        return ERR_PTR(-EIO);
2021
2022sync_fail:
2023        return (char*)page;
2024}
2025
2026int page_readlink(struct dentry *dentry, char *buffer, int buflen)
2027{
2028        struct page *page = NULL;
2029        char *s = page_getlink(dentry, &page);
2030        int res = vfs_readlink(dentry,buffer,buflen,s);
2031        if (page) {
2032                kunmap(page);
2033                page_cache_release(page);
2034        }
2035        return res;
2036}
2037
2038int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2039{
2040        struct page *page = NULL;
2041        char *s = page_getlink(dentry, &page);
2042        int res = __vfs_follow_link(nd, s);
2043        if (page) {
2044                kunmap(page);
2045                page_cache_release(page);
2046        }
2047        return res;
2048}
2049
2050struct inode_operations page_symlink_inode_operations = {
2051        readlink:       page_readlink,
2052        follow_link:    page_follow_link,
2053};
2054
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.