linux-old/fs/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/namei.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 * Some corrections by tytso.
   9 */
  10
  11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12 * lookup logic.
  13 */
  14/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
  15 */
  16
  17#include <linux/init.h>
  18#include <linux/slab.h>
  19#include <linux/fs.h>
  20#include <linux/quotaops.h>
  21#include <linux/pagemap.h>
  22#include <linux/dnotify.h>
  23#include <linux/smp_lock.h>
  24#include <linux/personality.h>
  25
  26#include <asm/namei.h>
  27#include <asm/uaccess.h>
  28
  29#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  30
  31/* [Feb-1997 T. Schoebel-Theuer]
  32 * Fundamental changes in the pathname lookup mechanisms (namei)
  33 * were necessary because of omirr.  The reason is that omirr needs
  34 * to know the _real_ pathname, not the user-supplied one, in case
  35 * of symlinks (and also when transname replacements occur).
  36 *
  37 * The new code replaces the old recursive symlink resolution with
  38 * an iterative one (in case of non-nested symlink chains).  It does
  39 * this with calls to <fs>_follow_link().
  40 * As a side effect, dir_namei(), _namei() and follow_link() are now 
  41 * replaced with a single function lookup_dentry() that can handle all 
  42 * the special cases of the former code.
  43 *
  44 * With the new dcache, the pathname is stored at each inode, at least as
  45 * long as the refcount of the inode is positive.  As a side effect, the
  46 * size of the dcache depends on the inode cache and thus is dynamic.
  47 *
  48 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  49 * resolution to correspond with current state of the code.
  50 *
  51 * Note that the symlink resolution is not *completely* iterative.
  52 * There is still a significant amount of tail- and mid- recursion in
  53 * the algorithm.  Also, note that <fs>_readlink() is not used in
  54 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  55 * may return different results than <fs>_follow_link().  Many virtual
  56 * filesystems (including /proc) exhibit this behavior.
  57 */
  58
  59/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  60 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  61 * and the name already exists in form of a symlink, try to create the new
  62 * name indicated by the symlink. The old code always complained that the
  63 * name already exists, due to not following the symlink even if its target
  64 * is nonexistent.  The new semantics affects also mknod() and link() when
  65 * the name is a symlink pointing to a non-existant name.
  66 *
  67 * I don't know which semantics is the right one, since I have no access
  68 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  69 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  70 * "old" one. Personally, I think the new semantics is much more logical.
  71 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  72 * file does succeed in both HP-UX and SunOs, but not in Solaris
  73 * and in the old Linux semantics.
  74 */
  75
  76/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  77 * semantics.  See the comments in "open_namei" and "do_link" below.
  78 *
  79 * [10-Sep-98 Alan Modra] Another symlink change.
  80 */
  81
  82/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
  83 *      inside the path - always follow.
  84 *      in the last component in creation/removal/renaming - never follow.
  85 *      if LOOKUP_FOLLOW passed - follow.
  86 *      if the pathname has trailing slashes - follow.
  87 *      otherwise - don't follow.
  88 * (applied in that order).
  89 *
  90 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
  91 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
  92 * During the 2.4 we need to fix the userland stuff depending on it -
  93 * hopefully we will be able to get rid of that wart in 2.5. So far only
  94 * XEmacs seems to be relying on it...
  95 */
  96
  97/* In order to reduce some races, while at the same time doing additional
  98 * checking and hopefully speeding things up, we copy filenames to the
  99 * kernel data space before using them..
 100 *
 101 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 102 * PATH_MAX includes the nul terminator --RR.
 103 */
 104static inline int do_getname(const char *filename, char *page)
 105{
 106        int retval;
 107        unsigned long len = PATH_MAX;
 108
 109        if ((unsigned long) filename >= TASK_SIZE) {
 110                if (!segment_eq(get_fs(), KERNEL_DS))
 111                        return -EFAULT;
 112        } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
 113                len = TASK_SIZE - (unsigned long) filename;
 114
 115        retval = strncpy_from_user((char *)page, filename, len);
 116        if (retval > 0) {
 117                if (retval < len)
 118                        return 0;
 119                return -ENAMETOOLONG;
 120        } else if (!retval)
 121                retval = -ENOENT;
 122        return retval;
 123}
 124
 125char * getname(const char * filename)
 126{
 127        char *tmp, *result;
 128
 129        result = ERR_PTR(-ENOMEM);
 130        tmp = __getname();
 131        if (tmp)  {
 132                int retval = do_getname(filename, tmp);
 133
 134                result = tmp;
 135                if (retval < 0) {
 136                        putname(tmp);
 137                        result = ERR_PTR(retval);
 138                }
 139        }
 140        return result;
 141}
 142
 143/*
 144 *      vfs_permission()
 145 *
 146 * is used to check for read/write/execute permissions on a file.
 147 * We use "fsuid" for this, letting us set arbitrary permissions
 148 * for filesystem access without changing the "normal" uids which
 149 * are used for other things..
 150 */
 151int vfs_permission(struct inode * inode, int mask)
 152{
 153        umode_t                 mode = inode->i_mode;
 154
 155        if (mask & MAY_WRITE) {
 156                /*
 157                 * Nobody gets write access to a read-only fs.
 158                 */
 159                if (IS_RDONLY(inode) &&
 160                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 161                        return -EROFS;
 162
 163                /*
 164                 * Nobody gets write access to an immutable file.
 165                 */
 166                if (IS_IMMUTABLE(inode))
 167                        return -EACCES;
 168        }
 169
 170        if (current->fsuid == inode->i_uid)
 171                mode >>= 6;
 172        else if (in_group_p(inode->i_gid))
 173                mode >>= 3;
 174
 175        /*
 176         * If the DACs are ok we don't need any capability check.
 177         */
 178        if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
 179                return 0;
 180
 181        /*
 182         * Read/write DACs are always overridable.
 183         * Executable DACs are overridable if at least one exec bit is set.
 184         */
 185        if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
 186                if (capable(CAP_DAC_OVERRIDE))
 187                        return 0;
 188
 189        /*
 190         * Searching includes executable on directories, else just read.
 191         */
 192        if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
 193                if (capable(CAP_DAC_READ_SEARCH))
 194                        return 0;
 195
 196        return -EACCES;
 197}
 198
 199int permission(struct inode * inode,int mask)
 200{
 201        if (inode->i_op && inode->i_op->permission) {
 202                int retval;
 203                lock_kernel();
 204                retval = inode->i_op->permission(inode, mask);
 205                unlock_kernel();
 206                return retval;
 207        }
 208        return vfs_permission(inode, mask);
 209}
 210
 211/*
 212 * get_write_access() gets write permission for a file.
 213 * put_write_access() releases this write permission.
 214 * This is used for regular files.
 215 * We cannot support write (and maybe mmap read-write shared) accesses and
 216 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 217 * can have the following values:
 218 * 0: no writers, no VM_DENYWRITE mappings
 219 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 220 * > 0: (i_writecount) users are writing to the file.
 221 *
 222 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
 223 * except for the cases where we don't hold i_writecount yet. Then we need to
 224 * use {get,deny}_write_access() - these functions check the sign and refuse
 225 * to do the change if sign is wrong. Exclusion between them is provided by
 226 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
 227 * who will try to move it in struct inode - just leave it here.
 228 */
 229static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
 230int get_write_access(struct inode * inode)
 231{
 232        spin_lock(&arbitration_lock);
 233        if (atomic_read(&inode->i_writecount) < 0) {
 234                spin_unlock(&arbitration_lock);
 235                return -ETXTBSY;
 236        }
 237        atomic_inc(&inode->i_writecount);
 238        spin_unlock(&arbitration_lock);
 239        return 0;
 240}
 241int deny_write_access(struct file * file)
 242{
 243        spin_lock(&arbitration_lock);
 244        if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
 245                spin_unlock(&arbitration_lock);
 246                return -ETXTBSY;
 247        }
 248        atomic_dec(&file->f_dentry->d_inode->i_writecount);
 249        spin_unlock(&arbitration_lock);
 250        return 0;
 251}
 252
 253void path_release(struct nameidata *nd)
 254{
 255        dput(nd->dentry);
 256        mntput(nd->mnt);
 257}
 258
 259/*
 260 * Internal lookup() using the new generic dcache.
 261 * SMP-safe
 262 */
 263static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 264{
 265        struct dentry * dentry = d_lookup(parent, name);
 266
 267        if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 268                if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 269                        dput(dentry);
 270                        dentry = NULL;
 271                }
 272        }
 273        return dentry;
 274}
 275
 276/*
 277 * This is called when everything else fails, and we actually have
 278 * to go to the low-level filesystem to find out what we should do..
 279 *
 280 * We get the directory semaphore, and after getting that we also
 281 * make sure that nobody added the entry to the dcache in the meantime..
 282 * SMP-safe
 283 */
 284static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 285{
 286        struct dentry * result;
 287        struct inode *dir = parent->d_inode;
 288
 289        down(&dir->i_sem);
 290        /*
 291         * First re-do the cached lookup just in case it was created
 292         * while we waited for the directory semaphore..
 293         *
 294         * FIXME! This could use version numbering or similar to
 295         * avoid unnecessary cache lookups.
 296         */
 297        result = d_lookup(parent, name);
 298        if (!result) {
 299                struct dentry * dentry = d_alloc(parent, name);
 300                result = ERR_PTR(-ENOMEM);
 301                if (dentry) {
 302                        lock_kernel();
 303                        result = dir->i_op->lookup(dir, dentry);
 304                        unlock_kernel();
 305                        if (result)
 306                                dput(dentry);
 307                        else
 308                                result = dentry;
 309                }
 310                up(&dir->i_sem);
 311                return result;
 312        }
 313
 314        /*
 315         * Uhhuh! Nasty case: the cache was re-populated while
 316         * we waited on the semaphore. Need to revalidate.
 317         */
 318        up(&dir->i_sem);
 319        if (result->d_op && result->d_op->d_revalidate) {
 320                if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
 321                        dput(result);
 322                        result = ERR_PTR(-ENOENT);
 323                }
 324        }
 325        return result;
 326}
 327
 328/*
 329 * This limits recursive symlink follows to 8, while
 330 * limiting consecutive symlinks to 40.
 331 *
 332 * Without that kind of total limit, nasty chains of consecutive
 333 * symlinks can cause almost arbitrarily long lookups. 
 334 */
 335static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 336{
 337        int err;
 338        if (current->link_count >= 5)
 339                goto loop;
 340        if (current->total_link_count >= 40)
 341                goto loop;
 342        if (current->need_resched) {
 343                current->state = TASK_RUNNING;
 344                schedule();
 345        }
 346        current->link_count++;
 347        current->total_link_count++;
 348        UPDATE_ATIME(dentry->d_inode);
 349        err = dentry->d_inode->i_op->follow_link(dentry, nd);
 350        current->link_count--;
 351        return err;
 352loop:
 353        path_release(nd);
 354        return -ELOOP;
 355}
 356
 357static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
 358{
 359        struct vfsmount *parent;
 360        struct dentry *dentry;
 361        spin_lock(&dcache_lock);
 362        parent=(*mnt)->mnt_parent;
 363        if (parent == *mnt) {
 364                spin_unlock(&dcache_lock);
 365                return 0;
 366        }
 367        mntget(parent);
 368        dentry=dget((*mnt)->mnt_mountpoint);
 369        spin_unlock(&dcache_lock);
 370        dput(*base);
 371        *base = dentry;
 372        mntput(*mnt);
 373        *mnt = parent;
 374        return 1;
 375}
 376
 377int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 378{
 379        return __follow_up(mnt, dentry);
 380}
 381
 382static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 383{
 384        struct vfsmount *mounted;
 385
 386        spin_lock(&dcache_lock);
 387        mounted = lookup_mnt(*mnt, *dentry);
 388        if (mounted) {
 389                *mnt = mntget(mounted);
 390                spin_unlock(&dcache_lock);
 391                dput(*dentry);
 392                mntput(mounted->mnt_parent);
 393                *dentry = dget(mounted->mnt_root);
 394                return 1;
 395        }
 396        spin_unlock(&dcache_lock);
 397        return 0;
 398}
 399
 400int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 401{
 402        return __follow_down(mnt,dentry);
 403}
 404 
 405static inline void follow_dotdot(struct nameidata *nd)
 406{
 407        while(1) {
 408                struct vfsmount *parent;
 409                struct dentry *dentry;
 410                read_lock(&current->fs->lock);
 411                if (nd->dentry == current->fs->root &&
 412                    nd->mnt == current->fs->rootmnt)  {
 413                        read_unlock(&current->fs->lock);
 414                        break;
 415                }
 416                read_unlock(&current->fs->lock);
 417                spin_lock(&dcache_lock);
 418                if (nd->dentry != nd->mnt->mnt_root) {
 419                        dentry = dget(nd->dentry->d_parent);
 420                        spin_unlock(&dcache_lock);
 421                        dput(nd->dentry);
 422                        nd->dentry = dentry;
 423                        break;
 424                }
 425                parent=nd->mnt->mnt_parent;
 426                if (parent == nd->mnt) {
 427                        spin_unlock(&dcache_lock);
 428                        break;
 429                }
 430                mntget(parent);
 431                dentry=dget(nd->mnt->mnt_mountpoint);
 432                spin_unlock(&dcache_lock);
 433                dput(nd->dentry);
 434                nd->dentry = dentry;
 435                mntput(nd->mnt);
 436                nd->mnt = parent;
 437        }
 438        while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
 439                ;
 440}
 441
 442/*
 443 * Name resolution.
 444 *
 445 * This is the basic name resolution function, turning a pathname
 446 * into the final dentry.
 447 *
 448 * We expect 'base' to be positive and a directory.
 449 */
 450int link_path_walk(const char * name, struct nameidata *nd)
 451{
 452        struct dentry *dentry;
 453        struct inode *inode;
 454        int err;
 455        unsigned int lookup_flags = nd->flags;
 456
 457        while (*name=='/')
 458                name++;
 459        if (!*name)
 460                goto return_reval;
 461
 462        inode = nd->dentry->d_inode;
 463        if (current->link_count)
 464                lookup_flags = LOOKUP_FOLLOW;
 465
 466        /* At this point we know we have a real path component. */
 467        for(;;) {
 468                unsigned long hash;
 469                struct qstr this;
 470                unsigned int c;
 471
 472                err = permission(inode, MAY_EXEC);
 473                dentry = ERR_PTR(err);
 474                if (err)
 475                        break;
 476
 477                this.name = name;
 478                c = *(const unsigned char *)name;
 479
 480                hash = init_name_hash();
 481                do {
 482                        name++;
 483                        hash = partial_name_hash(c, hash);
 484                        c = *(const unsigned char *)name;
 485                } while (c && (c != '/'));
 486                this.len = name - (const char *) this.name;
 487                this.hash = end_name_hash(hash);
 488
 489                /* remove trailing slashes? */
 490                if (!c)
 491                        goto last_component;
 492                while (*++name == '/');
 493                if (!*name)
 494                        goto last_with_slashes;
 495
 496                /*
 497                 * "." and ".." are special - ".." especially so because it has
 498                 * to be able to know about the current root directory and
 499                 * parent relationships.
 500                 */
 501                if (this.name[0] == '.') switch (this.len) {
 502                        default:
 503                                break;
 504                        case 2: 
 505                                if (this.name[1] != '.')
 506                                        break;
 507                                follow_dotdot(nd);
 508                                inode = nd->dentry->d_inode;
 509                                /* fallthrough */
 510                        case 1:
 511                                continue;
 512                }
 513                /*
 514                 * See if the low-level filesystem might want
 515                 * to use its own hash..
 516                 */
 517                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 518                        err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 519                        if (err < 0)
 520                                break;
 521                }
 522                /* This does the actual lookups.. */
 523                dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 524                if (!dentry) {
 525                        dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 526                        err = PTR_ERR(dentry);
 527                        if (IS_ERR(dentry))
 528                                break;
 529                }
 530                /* Check mountpoints.. */
 531                while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 532                        ;
 533
 534                err = -ENOENT;
 535                inode = dentry->d_inode;
 536                if (!inode)
 537                        goto out_dput;
 538                err = -ENOTDIR; 
 539                if (!inode->i_op)
 540                        goto out_dput;
 541
 542                if (inode->i_op->follow_link) {
 543                        err = do_follow_link(dentry, nd);
 544                        dput(dentry);
 545                        if (err)
 546                                goto return_err;
 547                        err = -ENOENT;
 548                        inode = nd->dentry->d_inode;
 549                        if (!inode)
 550                                break;
 551                        err = -ENOTDIR; 
 552                        if (!inode->i_op)
 553                                break;
 554                } else {
 555                        dput(nd->dentry);
 556                        nd->dentry = dentry;
 557                }
 558                err = -ENOTDIR; 
 559                if (!inode->i_op->lookup)
 560                        break;
 561                continue;
 562                /* here ends the main loop */
 563
 564last_with_slashes:
 565                lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 566last_component:
 567                if (lookup_flags & LOOKUP_PARENT)
 568                        goto lookup_parent;
 569                if (this.name[0] == '.') switch (this.len) {
 570                        default:
 571                                break;
 572                        case 2: 
 573                                if (this.name[1] != '.')
 574                                        break;
 575                                follow_dotdot(nd);
 576                                inode = nd->dentry->d_inode;
 577                                /* fallthrough */
 578                        case 1:
 579                                goto return_reval;
 580                }
 581                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 582                        err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 583                        if (err < 0)
 584                                break;
 585                }
 586                dentry = cached_lookup(nd->dentry, &this, 0);
 587                if (!dentry) {
 588                        dentry = real_lookup(nd->dentry, &this, 0);
 589                        err = PTR_ERR(dentry);
 590                        if (IS_ERR(dentry))
 591                                break;
 592                }
 593                while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 594                        ;
 595                inode = dentry->d_inode;
 596                if ((lookup_flags & LOOKUP_FOLLOW)
 597                    && inode && inode->i_op && inode->i_op->follow_link) {
 598                        err = do_follow_link(dentry, nd);
 599                        dput(dentry);
 600                        if (err)
 601                                goto return_err;
 602                        inode = nd->dentry->d_inode;
 603                } else {
 604                        dput(nd->dentry);
 605                        nd->dentry = dentry;
 606                }
 607                err = -ENOENT;
 608                if (!inode)
 609                        goto no_inode;
 610                if (lookup_flags & LOOKUP_DIRECTORY) {
 611                        err = -ENOTDIR; 
 612                        if (!inode->i_op || !inode->i_op->lookup)
 613                                break;
 614                }
 615                goto return_base;
 616no_inode:
 617                err = -ENOENT;
 618                if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
 619                        break;
 620                goto return_base;
 621lookup_parent:
 622                nd->last = this;
 623                nd->last_type = LAST_NORM;
 624                if (this.name[0] != '.')
 625                        goto return_base;
 626                if (this.len == 1)
 627                        nd->last_type = LAST_DOT;
 628                else if (this.len == 2 && this.name[1] == '.')
 629                        nd->last_type = LAST_DOTDOT;
 630return_reval:
 631                /*
 632                 * We bypassed the ordinary revalidation routines.
 633                 * Check the cached dentry for staleness.
 634                 */
 635                dentry = nd->dentry;
 636                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 637                        err = -ESTALE;
 638                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
 639                                d_invalidate(dentry);
 640                                break;
 641                        }
 642                }
 643return_base:
 644                return 0;
 645out_dput:
 646                dput(dentry);
 647                break;
 648        }
 649        path_release(nd);
 650return_err:
 651        return err;
 652}
 653
 654int path_walk(const char * name, struct nameidata *nd)
 655{
 656        current->total_link_count = 0;
 657        return link_path_walk(name, nd);
 658}
 659
 660/* SMP-safe */
 661/* returns 1 if everything is done */
 662static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 663{
 664        if (path_walk(name, nd))
 665                return 0;               /* something went wrong... */
 666
 667        if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
 668                struct nameidata nd_root;
 669                /*
 670                 * NAME was not found in alternate root or it's a directory.  Try to find
 671                 * it in the normal root:
 672                 */
 673                nd_root.last_type = LAST_ROOT;
 674                nd_root.flags = nd->flags;
 675                read_lock(&current->fs->lock);
 676                nd_root.mnt = mntget(current->fs->rootmnt);
 677                nd_root.dentry = dget(current->fs->root);
 678                read_unlock(&current->fs->lock);
 679                if (path_walk(name, &nd_root))
 680                        return 1;
 681                if (nd_root.dentry->d_inode) {
 682                        path_release(nd);
 683                        nd->dentry = nd_root.dentry;
 684                        nd->mnt = nd_root.mnt;
 685                        nd->last = nd_root.last;
 686                        return 1;
 687                }
 688                path_release(&nd_root);
 689        }
 690        return 1;
 691}
 692
 693void set_fs_altroot(void)
 694{
 695        char *emul = __emul_prefix();
 696        struct nameidata nd;
 697        struct vfsmount *mnt = NULL, *oldmnt;
 698        struct dentry *dentry = NULL, *olddentry;
 699        if (emul) {
 700                read_lock(&current->fs->lock);
 701                nd.mnt = mntget(current->fs->rootmnt);
 702                nd.dentry = dget(current->fs->root);
 703                read_unlock(&current->fs->lock);
 704                nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
 705                if (path_walk(emul,&nd) == 0) {
 706                        mnt = nd.mnt;
 707                        dentry = nd.dentry;
 708                }
 709        }
 710        write_lock(&current->fs->lock);
 711        oldmnt = current->fs->altrootmnt;
 712        olddentry = current->fs->altroot;
 713        current->fs->altrootmnt = mnt;
 714        current->fs->altroot = dentry;
 715        write_unlock(&current->fs->lock);
 716        if (olddentry) {
 717                dput(olddentry);
 718                mntput(oldmnt);
 719        }
 720}
 721
 722/* SMP-safe */
 723static inline int
 724walk_init_root(const char *name, struct nameidata *nd)
 725{
 726        read_lock(&current->fs->lock);
 727        if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 728                nd->mnt = mntget(current->fs->altrootmnt);
 729                nd->dentry = dget(current->fs->altroot);
 730                read_unlock(&current->fs->lock);
 731                if (__emul_lookup_dentry(name,nd))
 732                        return 0;
 733                read_lock(&current->fs->lock);
 734        }
 735        nd->mnt = mntget(current->fs->rootmnt);
 736        nd->dentry = dget(current->fs->root);
 737        read_unlock(&current->fs->lock);
 738        return 1;
 739}
 740
 741/* SMP-safe */
 742int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
 743{
 744        int error = 0;
 745        if (path_init(path, flags, nd))
 746                error = path_walk(path, nd);
 747        return error;
 748}
 749
 750
 751/* SMP-safe */
 752int path_init(const char *name, unsigned int flags, struct nameidata *nd)
 753{
 754        nd->last_type = LAST_ROOT; /* if there are only slashes... */
 755        nd->flags = flags;
 756        if (*name=='/')
 757                return walk_init_root(name,nd);
 758        read_lock(&current->fs->lock);
 759        nd->mnt = mntget(current->fs->pwdmnt);
 760        nd->dentry = dget(current->fs->pwd);
 761        read_unlock(&current->fs->lock);
 762        return 1;
 763}
 764
 765/*
 766 * Restricted form of lookup. Doesn't follow links, single-component only,
 767 * needs parent already locked. Doesn't follow mounts.
 768 * SMP-safe.
 769 */
 770struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
 771{
 772        struct dentry * dentry;
 773        struct inode *inode;
 774        int err;
 775
 776        inode = base->d_inode;
 777        err = permission(inode, MAY_EXEC);
 778        dentry = ERR_PTR(err);
 779        if (err)
 780                goto out;
 781
 782        /*
 783         * See if the low-level filesystem might want
 784         * to use its own hash..
 785         */
 786        if (base->d_op && base->d_op->d_hash) {
 787                err = base->d_op->d_hash(base, name);
 788                dentry = ERR_PTR(err);
 789                if (err < 0)
 790                        goto out;
 791        }
 792
 793        dentry = cached_lookup(base, name, 0);
 794        if (!dentry) {
 795                struct dentry *new = d_alloc(base, name);
 796                dentry = ERR_PTR(-ENOMEM);
 797                if (!new)
 798                        goto out;
 799                lock_kernel();
 800                dentry = inode->i_op->lookup(inode, new);
 801                unlock_kernel();
 802                if (!dentry)
 803                        dentry = new;
 804                else
 805                        dput(new);
 806        }
 807out:
 808        return dentry;
 809}
 810
 811/* SMP-safe */
 812struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
 813{
 814        unsigned long hash;
 815        struct qstr this;
 816        unsigned int c;
 817
 818        this.name = name;
 819        this.len = len;
 820        if (!len)
 821                goto access;
 822
 823        hash = init_name_hash();
 824        while (len--) {
 825                c = *(const unsigned char *)name++;
 826                if (c == '/' || c == '\0')
 827                        goto access;
 828                hash = partial_name_hash(c, hash);
 829        }
 830        this.hash = end_name_hash(hash);
 831
 832        return lookup_hash(&this, base);
 833access:
 834        return ERR_PTR(-EACCES);
 835}
 836
 837/*
 838 *      namei()
 839 *
 840 * is used by most simple commands to get the inode of a specified name.
 841 * Open, link etc use their own routines, but this is enough for things
 842 * like 'chmod' etc.
 843 *
 844 * namei exists in two versions: namei/lnamei. The only difference is
 845 * that namei follows links, while lnamei does not.
 846 * SMP-safe
 847 */
 848int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
 849{
 850        char *tmp;
 851        int err;
 852
 853        tmp = getname(name);
 854        err = PTR_ERR(tmp);
 855        if (!IS_ERR(tmp)) {
 856                err = 0;
 857                err = path_lookup(tmp, flags, nd);
 858                putname(tmp);
 859        }
 860        return err;
 861}
 862
 863/*
 864 * It's inline, so penalty for filesystems that don't use sticky bit is
 865 * minimal.
 866 */
 867static inline int check_sticky(struct inode *dir, struct inode *inode)
 868{
 869        if (!(dir->i_mode & S_ISVTX))
 870                return 0;
 871        if (inode->i_uid == current->fsuid)
 872                return 0;
 873        if (dir->i_uid == current->fsuid)
 874                return 0;
 875        return !capable(CAP_FOWNER);
 876}
 877
 878/*
 879 *      Check whether we can remove a link victim from directory dir, check
 880 *  whether the type of victim is right.
 881 *  1. We can't do it if dir is read-only (done in permission())
 882 *  2. We should have write and exec permissions on dir
 883 *  3. We can't remove anything from append-only dir
 884 *  4. We can't do anything with immutable dir (done in permission())
 885 *  5. If the sticky bit on dir is set we should either
 886 *      a. be owner of dir, or
 887 *      b. be owner of victim, or
 888 *      c. have CAP_FOWNER capability
 889 *  6. If the victim is append-only or immutable we can't do antyhing with
 890 *     links pointing to it.
 891 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 892 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 893 *  9. We can't remove a root or mountpoint.
 894 */
 895static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 896{
 897        int error;
 898        if (!victim->d_inode || victim->d_parent->d_inode != dir)
 899                return -ENOENT;
 900        error = permission(dir,MAY_WRITE | MAY_EXEC);
 901        if (error)
 902                return error;
 903        if (IS_APPEND(dir))
 904                return -EPERM;
 905        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 906            IS_IMMUTABLE(victim->d_inode))
 907                return -EPERM;
 908        if (isdir) {
 909                if (!S_ISDIR(victim->d_inode->i_mode))
 910                        return -ENOTDIR;
 911                if (IS_ROOT(victim))
 912                        return -EBUSY;
 913        } else if (S_ISDIR(victim->d_inode->i_mode))
 914                return -EISDIR;
 915        if (IS_DEADDIR(dir))
 916                return -ENOENT;
 917        return 0;
 918}
 919
 920/*      Check whether we can create an object with dentry child in directory
 921 *  dir.
 922 *  1. We can't do it if child already exists (open has special treatment for
 923 *     this case, but since we are inlined it's OK)
 924 *  2. We can't do it if dir is read-only (done in permission())
 925 *  3. We should have write and exec permissions on dir
 926 *  4. We can't do it if dir is immutable (done in permission())
 927 */
 928static inline int may_create(struct inode *dir, struct dentry *child) {
 929        if (child->d_inode)
 930                return -EEXIST;
 931        if (IS_DEADDIR(dir))
 932                return -ENOENT;
 933        return permission(dir,MAY_WRITE | MAY_EXEC);
 934}
 935
 936/* 
 937 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 938 * reasons.
 939 *
 940 * O_DIRECTORY translates into forcing a directory lookup.
 941 */
 942static inline int lookup_flags(unsigned int f)
 943{
 944        unsigned long retval = LOOKUP_FOLLOW;
 945
 946        if (f & O_NOFOLLOW)
 947                retval &= ~LOOKUP_FOLLOW;
 948        
 949        if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 950                retval &= ~LOOKUP_FOLLOW;
 951        
 952        if (f & O_DIRECTORY)
 953                retval |= LOOKUP_DIRECTORY;
 954
 955        return retval;
 956}
 957
 958int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
 959{
 960        int error;
 961
 962        mode &= S_IALLUGO;
 963        mode |= S_IFREG;
 964
 965        down(&dir->i_zombie);
 966        error = may_create(dir, dentry);
 967        if (error)
 968                goto exit_lock;
 969
 970        error = -EACCES;        /* shouldn't it be ENOSYS? */
 971        if (!dir->i_op || !dir->i_op->create)
 972                goto exit_lock;
 973
 974        DQUOT_INIT(dir);
 975        lock_kernel();
 976        error = dir->i_op->create(dir, dentry, mode);
 977        unlock_kernel();
 978exit_lock:
 979        up(&dir->i_zombie);
 980        if (!error)
 981                inode_dir_notify(dir, DN_CREATE);
 982        return error;
 983}
 984
 985/*
 986 *      open_namei()
 987 *
 988 * namei for open - this is in fact almost the whole open-routine.
 989 *
 990 * Note that the low bits of "flag" aren't the same as in the open
 991 * system call - they are 00 - no permissions needed
 992 *                        01 - read permission needed
 993 *                        10 - write permission needed
 994 *                        11 - read/write permissions needed
 995 * which is a lot more logical, and also allows the "no perm" needed
 996 * for symlinks (where the permissions are checked later).
 997 * SMP-safe
 998 */
 999int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1000{
1001        int acc_mode, error = 0;
1002        struct inode *inode;
1003        struct dentry *dentry;
1004        struct dentry *dir;
1005        int count = 0;
1006
1007        acc_mode = ACC_MODE(flag);
1008
1009        /*
1010         * The simplest case - just a plain lookup.
1011         */
1012        if (!(flag & O_CREAT)) {
1013                error = path_lookup(pathname, lookup_flags(flag), nd);
1014                if (error)
1015                        return error;
1016                dentry = nd->dentry;
1017                goto ok;
1018        }
1019
1020        /*
1021         * Create - we need to know the parent.
1022         */
1023        error = path_lookup(pathname, LOOKUP_PARENT, nd);
1024        if (error)
1025                return error;
1026
1027        /*
1028         * We have the parent and last component. First of all, check
1029         * that we are not asked to creat(2) an obvious directory - that
1030         * will not do.
1031         */
1032        error = -EISDIR;
1033        if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1034                goto exit;
1035
1036        dir = nd->dentry;
1037        down(&dir->d_inode->i_sem);
1038        dentry = lookup_hash(&nd->last, nd->dentry);
1039
1040do_last:
1041        error = PTR_ERR(dentry);
1042        if (IS_ERR(dentry)) {
1043                up(&dir->d_inode->i_sem);
1044                goto exit;
1045        }
1046
1047        /* Negative dentry, just create the file */
1048        if (!dentry->d_inode) {
1049                error = vfs_create(dir->d_inode, dentry,
1050                                   mode & ~current->fs->umask);
1051                up(&dir->d_inode->i_sem);
1052                dput(nd->dentry);
1053                nd->dentry = dentry;
1054                if (error)
1055                        goto exit;
1056                /* Don't check for write permission, don't truncate */
1057                acc_mode = 0;
1058                flag &= ~O_TRUNC;
1059                goto ok;
1060        }
1061
1062        /*
1063         * It already exists.
1064         */
1065        up(&dir->d_inode->i_sem);
1066
1067        error = -EEXIST;
1068        if (flag & O_EXCL)
1069                goto exit_dput;
1070
1071        if (d_mountpoint(dentry)) {
1072                error = -ELOOP;
1073                if (flag & O_NOFOLLOW)
1074                        goto exit_dput;
1075                while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1076        }
1077        error = -ENOENT;
1078        if (!dentry->d_inode)
1079                goto exit_dput;
1080        if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1081                goto do_link;
1082
1083        dput(nd->dentry);
1084        nd->dentry = dentry;
1085        error = -EISDIR;
1086        if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1087                goto exit;
1088ok:
1089        error = -ENOENT;
1090        inode = dentry->d_inode;
1091        if (!inode)
1092                goto exit;
1093
1094        error = -ELOOP;
1095        if (S_ISLNK(inode->i_mode))
1096                goto exit;
1097        
1098        error = -EISDIR;
1099        if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1100                goto exit;
1101
1102        error = permission(inode,acc_mode);
1103        if (error)
1104                goto exit;
1105
1106        /*
1107         * FIFO's, sockets and device files are special: they don't
1108         * actually live on the filesystem itself, and as such you
1109         * can write to them even if the filesystem is read-only.
1110         */
1111        if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1112                flag &= ~O_TRUNC;
1113        } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1114                error = -EACCES;
1115                if (nd->mnt->mnt_flags & MNT_NODEV)
1116                        goto exit;
1117
1118                flag &= ~O_TRUNC;
1119        } else {
1120                error = -EROFS;
1121                if (IS_RDONLY(inode) && (flag & 2))
1122                        goto exit;
1123        }
1124        /*
1125         * An append-only file must be opened in append mode for writing.
1126         */
1127        error = -EPERM;
1128        if (IS_APPEND(inode)) {
1129                if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1130                        goto exit;
1131                if (flag & O_TRUNC)
1132                        goto exit;
1133        }
1134
1135        /*
1136         * Ensure there are no outstanding leases on the file.
1137         */
1138        error = get_lease(inode, flag);
1139        if (error)
1140                goto exit;
1141
1142        if (flag & O_TRUNC) {
1143                error = get_write_access(inode);
1144                if (error)
1145                        goto exit;
1146
1147                /*
1148                 * Refuse to truncate files with mandatory locks held on them.
1149                 */
1150                error = locks_verify_locked(inode);
1151                if (!error) {
1152                        DQUOT_INIT(inode);
1153                        
1154                        error = do_truncate(dentry, 0);
1155                }
1156                put_write_access(inode);
1157                if (error)
1158                        goto exit;
1159        } else
1160                if (flag & FMODE_WRITE)
1161                        DQUOT_INIT(inode);
1162
1163        return 0;
1164
1165exit_dput:
1166        dput(dentry);
1167exit:
1168        path_release(nd);
1169        return error;
1170
1171do_link:
1172        error = -ELOOP;
1173        if (flag & O_NOFOLLOW)
1174                goto exit_dput;
1175        /*
1176         * This is subtle. Instead of calling do_follow_link() we do the
1177         * thing by hands. The reason is that this way we have zero link_count
1178         * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1179         * After that we have the parent and last component, i.e.
1180         * we are in the same situation as after the first path_walk().
1181         * Well, almost - if the last component is normal we get its copy
1182         * stored in nd->last.name and we will have to putname() it when we
1183         * are done. Procfs-like symlinks just set LAST_BIND.
1184         */
1185        UPDATE_ATIME(dentry->d_inode);
1186        error = dentry->d_inode->i_op->follow_link(dentry, nd);
1187        dput(dentry);
1188        if (error)
1189                return error;
1190        if (nd->last_type == LAST_BIND) {
1191                dentry = nd->dentry;
1192                goto ok;
1193        }
1194        error = -EISDIR;
1195        if (nd->last_type != LAST_NORM)
1196                goto exit;
1197        if (nd->last.name[nd->last.len]) {
1198                putname(nd->last.name);
1199                goto exit;
1200        }
1201        error = -ELOOP;
1202        if (count++==32) {
1203                putname(nd->last.name);
1204                goto exit;
1205        }
1206        dir = nd->dentry;
1207        down(&dir->d_inode->i_sem);
1208        dentry = lookup_hash(&nd->last, nd->dentry);
1209        putname(nd->last.name);
1210        goto do_last;
1211}
1212
1213/* SMP-safe */
1214static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1215{
1216        struct dentry *dentry;
1217
1218        down(&nd->dentry->d_inode->i_sem);
1219        dentry = ERR_PTR(-EEXIST);
1220        if (nd->last_type != LAST_NORM)
1221                goto fail;
1222        dentry = lookup_hash(&nd->last, nd->dentry);
1223        if (IS_ERR(dentry))
1224                goto fail;
1225        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1226                goto enoent;
1227        return dentry;
1228enoent:
1229        dput(dentry);
1230        dentry = ERR_PTR(-ENOENT);
1231fail:
1232        return dentry;
1233}
1234
1235int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1236{
1237        int error = -EPERM;
1238
1239        down(&dir->i_zombie);
1240        if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1241                goto exit_lock;
1242
1243        error = may_create(dir, dentry);
1244        if (error)
1245                goto exit_lock;
1246
1247        error = -EPERM;
1248        if (!dir->i_op || !dir->i_op->mknod)
1249                goto exit_lock;
1250
1251        DQUOT_INIT(dir);
1252        lock_kernel();
1253        error = dir->i_op->mknod(dir, dentry, mode, dev);
1254        unlock_kernel();
1255exit_lock:
1256        up(&dir->i_zombie);
1257        if (!error)
1258                inode_dir_notify(dir, DN_CREATE);
1259        return error;
1260}
1261
1262asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1263{
1264        int error = 0;
1265        char * tmp;
1266        struct dentry * dentry;
1267        struct nameidata nd;
1268
1269        if (S_ISDIR(mode))
1270                return -EPERM;
1271        tmp = getname(filename);
1272        if (IS_ERR(tmp))
1273                return PTR_ERR(tmp);
1274
1275        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1276        if (error)
1277                goto out;
1278        dentry = lookup_create(&nd, 0);
1279        error = PTR_ERR(dentry);
1280
1281        mode &= ~current->fs->umask;
1282        if (!IS_ERR(dentry)) {
1283                switch (mode & S_IFMT) {
1284                case 0: case S_IFREG:
1285                        error = vfs_create(nd.dentry->d_inode,dentry,mode);
1286                        break;
1287                case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1288                        error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1289                        break;
1290                case S_IFDIR:
1291                        error = -EPERM;
1292                        break;
1293                default:
1294                        error = -EINVAL;
1295                }
1296                dput(dentry);
1297        }
1298        up(&nd.dentry->d_inode->i_sem);
1299        path_release(&nd);
1300out:
1301        putname(tmp);
1302
1303        return error;
1304}
1305
1306int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1307{
1308        int error;
1309
1310        down(&dir->i_zombie);
1311        error = may_create(dir, dentry);
1312        if (error)
1313                goto exit_lock;
1314
1315        error = -EPERM;
1316        if (!dir->i_op || !dir->i_op->mkdir)
1317                goto exit_lock;
1318
1319        DQUOT_INIT(dir);
1320        mode &= (S_IRWXUGO|S_ISVTX);
1321        lock_kernel();
1322        error = dir->i_op->mkdir(dir, dentry, mode);
1323        unlock_kernel();
1324
1325exit_lock:
1326        up(&dir->i_zombie);
1327        if (!error)
1328                inode_dir_notify(dir, DN_CREATE);
1329        return error;
1330}
1331
1332asmlinkage long sys_mkdir(const char * pathname, int mode)
1333{
1334        int error = 0;
1335        char * tmp;
1336
1337        tmp = getname(pathname);
1338        error = PTR_ERR(tmp);
1339        if (!IS_ERR(tmp)) {
1340                struct dentry *dentry;
1341                struct nameidata nd;
1342
1343                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1344                if (error)
1345                        goto out;
1346                dentry = lookup_create(&nd, 1);
1347                error = PTR_ERR(dentry);
1348                if (!IS_ERR(dentry)) {
1349                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
1350                                          mode & ~current->fs->umask);
1351                        dput(dentry);
1352                }
1353                up(&nd.dentry->d_inode->i_sem);
1354                path_release(&nd);
1355out:
1356                putname(tmp);
1357        }
1358
1359        return error;
1360}
1361
1362/*
1363 * We try to drop the dentry early: we should have
1364 * a usage count of 2 if we're the only user of this
1365 * dentry, and if that is true (possibly after pruning
1366 * the dcache), then we drop the dentry now.
1367 *
1368 * A low-level filesystem can, if it choses, legally
1369 * do a
1370 *
1371 *      if (!d_unhashed(dentry))
1372 *              return -EBUSY;
1373 *
1374 * if it cannot handle the case of removing a directory
1375 * that is still in use by something else..
1376 */
1377static void d_unhash(struct dentry *dentry)
1378{
1379        dget(dentry);
1380        spin_lock(&dcache_lock);
1381        switch (atomic_read(&dentry->d_count)) {
1382        default:
1383                spin_unlock(&dcache_lock);
1384                shrink_dcache_parent(dentry);
1385                spin_lock(&dcache_lock);
1386                if (atomic_read(&dentry->d_count) != 2)
1387                        break;
1388        case 2:
1389                list_del_init(&dentry->d_hash);
1390        }
1391        spin_unlock(&dcache_lock);
1392}
1393
1394int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1395{
1396        int error;
1397
1398        error = may_delete(dir, dentry, 1);
1399        if (error)
1400                return error;
1401
1402        if (!dir->i_op || !dir->i_op->rmdir)
1403                return -EPERM;
1404
1405        DQUOT_INIT(dir);
1406
1407        double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1408        d_unhash(dentry);
1409        if (d_mountpoint(dentry))
1410                error = -EBUSY;
1411        else {
1412                lock_kernel();
1413                error = dir->i_op->rmdir(dir, dentry);
1414                unlock_kernel();
1415                if (!error)
1416                        dentry->d_inode->i_flags |= S_DEAD;
1417        }
1418        double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1419        if (!error) {
1420                inode_dir_notify(dir, DN_DELETE);
1421                d_delete(dentry);
1422        }
1423        dput(dentry);
1424
1425        return error;
1426}
1427
1428asmlinkage long sys_rmdir(const char * pathname)
1429{
1430        int error = 0;
1431        char * name;
1432        struct dentry *dentry;
1433        struct nameidata nd;
1434
1435        name = getname(pathname);
1436        if(IS_ERR(name))
1437                return PTR_ERR(name);
1438
1439        error = path_lookup(name, LOOKUP_PARENT, &nd);
1440        if (error)
1441                goto exit;
1442
1443        switch(nd.last_type) {
1444                case LAST_DOTDOT:
1445                        error = -ENOTEMPTY;
1446                        goto exit1;
1447                case LAST_DOT:
1448                        error = -EINVAL;
1449                        goto exit1;
1450                case LAST_ROOT:
1451                        error = -EBUSY;
1452                        goto exit1;
1453        }
1454        down(&nd.dentry->d_inode->i_sem);
1455        dentry = lookup_hash(&nd.last, nd.dentry);
1456        error = PTR_ERR(dentry);
1457        if (!IS_ERR(dentry)) {
1458                error = vfs_rmdir(nd.dentry->d_inode, dentry);
1459                dput(dentry);
1460        }
1461        up(&nd.dentry->d_inode->i_sem);
1462exit1:
1463        path_release(&nd);
1464exit:
1465        putname(name);
1466        return error;
1467}
1468
1469int vfs_unlink(struct inode *dir, struct dentry *dentry)
1470{
1471        int error;
1472
1473        down(&dir->i_zombie);
1474        error = may_delete(dir, dentry, 0);
1475        if (!error) {
1476                error = -EPERM;
1477                if (dir->i_op && dir->i_op->unlink) {
1478                        DQUOT_INIT(dir);
1479                        if (d_mountpoint(dentry))
1480                                error = -EBUSY;
1481                        else {
1482                                lock_kernel();
1483                                error = dir->i_op->unlink(dir, dentry);
1484                                unlock_kernel();
1485                                if (!error)
1486                                        d_delete(dentry);
1487                        }
1488                }
1489        }
1490        up(&dir->i_zombie);
1491        if (!error)
1492                inode_dir_notify(dir, DN_DELETE);
1493        return error;
1494}
1495
1496asmlinkage long sys_unlink(const char * pathname)
1497{
1498        int error = 0;
1499        char * name;
1500        struct dentry *dentry;
1501        struct nameidata nd;
1502
1503        name = getname(pathname);
1504        if(IS_ERR(name))
1505                return PTR_ERR(name);
1506
1507        error = path_lookup(name, LOOKUP_PARENT, &nd);
1508        if (error)
1509                goto exit;
1510        error = -EISDIR;
1511        if (nd.last_type != LAST_NORM)
1512                goto exit1;
1513        down(&nd.dentry->d_inode->i_sem);
1514        dentry = lookup_hash(&nd.last, nd.dentry);
1515        error = PTR_ERR(dentry);
1516        if (!IS_ERR(dentry)) {
1517                /* Why not before? Because we want correct error value */
1518                if (nd.last.name[nd.last.len])
1519                        goto slashes;
1520                error = vfs_unlink(nd.dentry->d_inode, dentry);
1521        exit2:
1522                dput(dentry);
1523        }
1524        up(&nd.dentry->d_inode->i_sem);
1525exit1:
1526        path_release(&nd);
1527exit:
1528        putname(name);
1529
1530        return error;
1531
1532slashes:
1533        error = !dentry->d_inode ? -ENOENT :
1534                S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1535        goto exit2;
1536}
1537
1538int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1539{
1540        int error;
1541
1542        down(&dir->i_zombie);
1543        error = may_create(dir, dentry);
1544        if (error)
1545                goto exit_lock;
1546
1547        error = -EPERM;
1548        if (!dir->i_op || !dir->i_op->symlink)
1549                goto exit_lock;
1550
1551        DQUOT_INIT(dir);
1552        lock_kernel();
1553        error = dir->i_op->symlink(dir, dentry, oldname);
1554        unlock_kernel();
1555
1556exit_lock:
1557        up(&dir->i_zombie);
1558        if (!error)
1559                inode_dir_notify(dir, DN_CREATE);
1560        return error;
1561}
1562
1563asmlinkage long sys_symlink(const char * oldname, const char * newname)
1564{
1565        int error = 0;
1566        char * from;
1567        char * to;
1568
1569        from = getname(oldname);
1570        if(IS_ERR(from))
1571                return PTR_ERR(from);
1572        to = getname(newname);
1573        error = PTR_ERR(to);
1574        if (!IS_ERR(to)) {
1575                struct dentry *dentry;
1576                struct nameidata nd;
1577
1578                error = path_lookup(to, LOOKUP_PARENT, &nd);
1579                if (error)
1580                        goto out;
1581                dentry = lookup_create(&nd, 0);
1582                error = PTR_ERR(dentry);
1583                if (!IS_ERR(dentry)) {
1584                        error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1585                        dput(dentry);
1586                }
1587                up(&nd.dentry->d_inode->i_sem);
1588                path_release(&nd);
1589out:
1590                putname(to);
1591        }
1592        putname(from);
1593        return error;
1594}
1595
1596int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1597{
1598        struct inode *inode;
1599        int error;
1600
1601        down(&dir->i_zombie);
1602        error = -ENOENT;
1603        inode = old_dentry->d_inode;
1604        if (!inode)
1605                goto exit_lock;
1606
1607        error = may_create(dir, new_dentry);
1608        if (error)
1609                goto exit_lock;
1610
1611        error = -EXDEV;
1612        if (dir->i_dev != inode->i_dev)
1613                goto exit_lock;
1614
1615        /*
1616         * A link to an append-only or immutable file cannot be created.
1617         */
1618        error = -EPERM;
1619        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1620                goto exit_lock;
1621        if (!dir->i_op || !dir->i_op->link)
1622                goto exit_lock;
1623
1624        DQUOT_INIT(dir);
1625        lock_kernel();
1626        error = dir->i_op->link(old_dentry, dir, new_dentry);
1627        unlock_kernel();
1628
1629exit_lock:
1630        up(&dir->i_zombie);
1631        if (!error)
1632                inode_dir_notify(dir, DN_CREATE);
1633        return error;
1634}
1635
1636/*
1637 * Hardlinks are often used in delicate situations.  We avoid
1638 * security-related surprises by not following symlinks on the
1639 * newname.  --KAB
1640 *
1641 * We don't follow them on the oldname either to be compatible
1642 * with linux 2.0, and to avoid hard-linking to directories
1643 * and other special files.  --ADM
1644 */
1645asmlinkage long sys_link(const char * oldname, const char * newname)
1646{
1647        int error;
1648        char * to;
1649
1650        to = getname(newname);
1651        error = PTR_ERR(to);
1652        if (!IS_ERR(to)) {
1653                struct dentry *new_dentry;
1654                struct nameidata nd, old_nd;
1655
1656                error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
1657                if (error)
1658                        goto exit;
1659                error = path_lookup(to, LOOKUP_PARENT, &nd);
1660                if (error)
1661                        goto out;
1662                error = -EXDEV;
1663                if (old_nd.mnt != nd.mnt)
1664                        goto out_release;
1665                new_dentry = lookup_create(&nd, 0);
1666                error = PTR_ERR(new_dentry);
1667                if (!IS_ERR(new_dentry)) {
1668                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1669                        dput(new_dentry);
1670                }
1671                up(&nd.dentry->d_inode->i_sem);
1672out_release:
1673                path_release(&nd);
1674out:
1675                path_release(&old_nd);
1676exit:
1677                putname(to);
1678        }
1679        return error;
1680}
1681
1682/*
1683 * The worst of all namespace operations - renaming directory. "Perverted"
1684 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1685 * Problems:
1686 *      a) we can get into loop creation. Check is done in is_subdir().
1687 *      b) race potential - two innocent renames can create a loop together.
1688 *         That's where 4.4 screws up. Current fix: serialization on
1689 *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1690 *         story.
1691 *      c) we have to lock _three_ objects - parents and victim (if it exists).
1692 *         And that - after we got ->i_sem on parents (until then we don't know
1693 *         whether the target exists at all, let alone whether it is a directory
1694 *         or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1695 *         on link creation/removal of any kind. And taken (without ->i_sem) on
1696 *         directory that will be removed (both in rmdir() and here).
1697 *      d) some filesystems don't support opened-but-unlinked directories,
1698 *         either because of layout or because they are not ready to deal with
1699 *         all cases correctly. The latter will be fixed (taking this sort of
1700 *         stuff into VFS), but the former is not going away. Solution: the same
1701 *         trick as in rmdir().
1702 *      e) conversion from fhandle to dentry may come in the wrong moment - when
1703 *         we are removing the target. Solution: we will have to grab ->i_zombie
1704 *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1705 *         ->i_sem on parents, which works but leads to some truely excessive
1706 *         locking].
1707 */
1708int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1709               struct inode *new_dir, struct dentry *new_dentry)
1710{
1711        int error;
1712        struct inode *target;
1713
1714        if (old_dentry->d_inode == new_dentry->d_inode)
1715                return 0;
1716
1717        error = may_delete(old_dir, old_dentry, 1);
1718        if (error)
1719                return error;
1720
1721        if (new_dir->i_dev != old_dir->i_dev)
1722                return -EXDEV;
1723
1724        if (!new_dentry->d_inode)
1725                error = may_create(new_dir, new_dentry);
1726        else
1727                error = may_delete(new_dir, new_dentry, 1);
1728        if (error)
1729                return error;
1730
1731        if (!old_dir->i_op || !old_dir->i_op->rename)
1732                return -EPERM;
1733
1734        /*
1735         * If we are going to change the parent - check write permissions,
1736         * we'll need to flip '..'.
1737         */
1738        if (new_dir != old_dir) {
1739                error = permission(old_dentry->d_inode, MAY_WRITE);
1740        }
1741        if (error)
1742                return error;
1743
1744        DQUOT_INIT(old_dir);
1745        DQUOT_INIT(new_dir);
1746        down(&old_dir->i_sb->s_vfs_rename_sem);
1747        error = -EINVAL;
1748        if (is_subdir(new_dentry, old_dentry))
1749                goto out_unlock;
1750        /* Don't eat your daddy, dear... */
1751        /* This also avoids locking issues */
1752        if (old_dentry->d_parent == new_dentry)
1753                goto out_unlock;
1754        target = new_dentry->d_inode;
1755        if (target) { /* Hastur! Hastur! Hastur! */
1756                triple_down(&old_dir->i_zombie,
1757                            &new_dir->i_zombie,
1758                            &target->i_zombie);
1759                d_unhash(new_dentry);
1760        } else
1761                double_down(&old_dir->i_zombie,
1762                            &new_dir->i_zombie);
1763        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1764                error = -EBUSY;
1765        else 
1766                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1767        if (target) {
1768                if (!error)
1769                        target->i_flags |= S_DEAD;
1770                triple_up(&old_dir->i_zombie,
1771                          &new_dir->i_zombie,
1772                          &target->i_zombie);
1773                if (d_unhashed(new_dentry))
1774                        d_rehash(new_dentry);
1775                dput(new_dentry);
1776        } else
1777                double_up(&old_dir->i_zombie,
1778                          &new_dir->i_zombie);
1779                
1780        if (!error)
1781                d_move(old_dentry,new_dentry);
1782out_unlock:
1783        up(&old_dir->i_sb->s_vfs_rename_sem);
1784        return error;
1785}
1786
1787int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1788               struct inode *new_dir, struct dentry *new_dentry)
1789{
1790        int error;
1791
1792        if (old_dentry->d_inode == new_dentry->d_inode)
1793                return 0;
1794
1795        error = may_delete(old_dir, old_dentry, 0);
1796        if (error)
1797                return error;
1798
1799        if (new_dir->i_dev != old_dir->i_dev)
1800                return -EXDEV;
1801
1802        if (!new_dentry->d_inode)
1803                error = may_create(new_dir, new_dentry);
1804        else
1805                error = may_delete(new_dir, new_dentry, 0);
1806        if (error)
1807                return error;
1808
1809        if (!old_dir->i_op || !old_dir->i_op->rename)
1810                return -EPERM;
1811
1812        DQUOT_INIT(old_dir);
1813        DQUOT_INIT(new_dir);
1814        double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1815        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1816                error = -EBUSY;
1817        else
1818                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1819        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1820        if (error)
1821                return error;
1822        /* The following d_move() should become unconditional */
1823        if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1824                d_move(old_dentry, new_dentry);
1825        }
1826        return 0;
1827}
1828
1829int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1830               struct inode *new_dir, struct dentry *new_dentry)
1831{
1832        int error;
1833        if (S_ISDIR(old_dentry->d_inode->i_mode))
1834                error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1835        else
1836                error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1837        if (!error) {
1838                if (old_dir == new_dir)
1839                        inode_dir_notify(old_dir, DN_RENAME);
1840                else {
1841                        inode_dir_notify(old_dir, DN_DELETE);
1842                        inode_dir_notify(new_dir, DN_CREATE);
1843                }
1844        }
1845        return error;
1846}
1847
1848static inline int do_rename(const char * oldname, const char * newname)
1849{
1850        int error = 0;
1851        struct dentry * old_dir, * new_dir;
1852        struct dentry * old_dentry, *new_dentry;
1853        struct nameidata oldnd, newnd;
1854
1855        error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
1856        if (error)
1857                goto exit;
1858
1859        error = path_lookup(newname, LOOKUP_PARENT, &newnd);
1860        if (error)
1861                goto exit1;
1862
1863        error = -EXDEV;
1864        if (oldnd.mnt != newnd.mnt)
1865                goto exit2;
1866
1867        old_dir = oldnd.dentry;
1868        error = -EBUSY;
1869        if (oldnd.last_type != LAST_NORM)
1870                goto exit2;
1871
1872        new_dir = newnd.dentry;
1873        if (newnd.last_type != LAST_NORM)
1874                goto exit2;
1875
1876        double_lock(new_dir, old_dir);
1877
1878        old_dentry = lookup_hash(&oldnd.last, old_dir);
1879        error = PTR_ERR(old_dentry);
1880        if (IS_ERR(old_dentry))
1881                goto exit3;
1882        /* source must exist */
1883        error = -ENOENT;
1884        if (!old_dentry->d_inode)
1885                goto exit4;
1886        /* unless the source is a directory trailing slashes give -ENOTDIR */
1887        if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1888                error = -ENOTDIR;
1889                if (oldnd.last.name[oldnd.last.len])
1890                        goto exit4;
1891                if (newnd.last.name[newnd.last.len])
1892                        goto exit4;
1893        }
1894        new_dentry = lookup_hash(&newnd.last, new_dir);
1895        error = PTR_ERR(new_dentry);
1896        if (IS_ERR(new_dentry))
1897                goto exit4;
1898
1899        lock_kernel();
1900        error = vfs_rename(old_dir->d_inode, old_dentry,
1901                                   new_dir->d_inode, new_dentry);
1902        unlock_kernel();
1903
1904        dput(new_dentry);
1905exit4:
1906        dput(old_dentry);
1907exit3:
1908        double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1909exit2:
1910        path_release(&newnd);
1911exit1:
1912        path_release(&oldnd);
1913exit:
1914        return error;
1915}
1916
1917asmlinkage long sys_rename(const char * oldname, const char * newname)
1918{
1919        int error;
1920        char * from;
1921        char * to;
1922
1923        from = getname(oldname);
1924        if(IS_ERR(from))
1925                return PTR_ERR(from);
1926        to = getname(newname);
1927        error = PTR_ERR(to);
1928        if (!IS_ERR(to)) {
1929                error = do_rename(from,to);
1930                putname(to);
1931        }
1932        putname(from);
1933        return error;
1934}
1935
1936int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1937{
1938        int len;
1939
1940        len = PTR_ERR(link);
1941        if (IS_ERR(link))
1942                goto out;
1943
1944        len = strlen(link);
1945        if (len > (unsigned) buflen)
1946                len = buflen;
1947        if (copy_to_user(buffer, link, len))
1948                len = -EFAULT;
1949out:
1950        return len;
1951}
1952
1953static inline int
1954__vfs_follow_link(struct nameidata *nd, const char *link)
1955{
1956        int res = 0;
1957        char *name;
1958        if (IS_ERR(link))
1959                goto fail;
1960
1961        if (*link == '/') {
1962                path_release(nd);
1963                if (!walk_init_root(link, nd))
1964                        /* weird __emul_prefix() stuff did it */
1965                        goto out;
1966        }
1967        res = link_path_walk(link, nd);
1968out:
1969        if (current->link_count || res || nd->last_type!=LAST_NORM)
1970                return res;
1971        /*
1972         * If it is an iterative symlinks resolution in open_namei() we
1973         * have to copy the last component. And all that crap because of
1974         * bloody create() on broken symlinks. Furrfu...
1975         */
1976        name = __getname();
1977        if (!name)
1978                return -ENOMEM;
1979        strcpy(name, nd->last.name);
1980        nd->last.name = name;
1981        return 0;
1982fail:
1983        path_release(nd);
1984        return PTR_ERR(link);
1985}
1986
1987int vfs_follow_link(struct nameidata *nd, const char *link)
1988{
1989        return __vfs_follow_link(nd, link);
1990}
1991
1992/* get the link contents into pagecache */
1993static char *page_getlink(struct dentry * dentry, struct page **ppage)
1994{
1995        struct page * page;
1996        struct address_space *mapping = dentry->d_inode->i_mapping;
1997        page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1998                                NULL);
1999        if (IS_ERR(page))
2000                goto sync_fail;
2001        wait_on_page(page);
2002        if (!Page_Uptodate(page))
2003                goto async_fail;
2004        *ppage = page;
2005        return kmap(page);
2006
2007async_fail:
2008        page_cache_release(page);
2009        return ERR_PTR(-EIO);
2010
2011sync_fail:
2012        return (char*)page;
2013}
2014
2015int page_readlink(struct dentry *dentry, char *buffer, int buflen)
2016{
2017        struct page *page = NULL;
2018        char *s = page_getlink(dentry, &page);
2019        int res = vfs_readlink(dentry,buffer,buflen,s);
2020        if (page) {
2021                kunmap(page);
2022                page_cache_release(page);
2023        }
2024        return res;
2025}
2026
2027int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2028{
2029        struct page *page = NULL;
2030        char *s = page_getlink(dentry, &page);
2031        int res = __vfs_follow_link(nd, s);
2032        if (page) {
2033                kunmap(page);
2034                page_cache_release(page);
2035        }
2036        return res;
2037}
2038
2039struct inode_operations page_symlink_inode_operations = {
2040        readlink:       page_readlink,
2041        follow_link:    page_follow_link,
2042};
2043
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.