linux-old/fs/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/namei.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 * Some corrections by tytso.
   9 */
  10
  11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12 * lookup logic.
  13 */
  14/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
  15 */
  16
  17#include <linux/init.h>
  18#include <linux/slab.h>
  19#include <linux/fs.h>
  20#include <linux/quotaops.h>
  21#include <linux/pagemap.h>
  22#include <linux/dnotify.h>
  23#include <linux/smp_lock.h>
  24#include <linux/personality.h>
  25
  26#include <asm/namei.h>
  27#include <asm/uaccess.h>
  28
  29#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  30
  31/* [Feb-1997 T. Schoebel-Theuer]
  32 * Fundamental changes in the pathname lookup mechanisms (namei)
  33 * were necessary because of omirr.  The reason is that omirr needs
  34 * to know the _real_ pathname, not the user-supplied one, in case
  35 * of symlinks (and also when transname replacements occur).
  36 *
  37 * The new code replaces the old recursive symlink resolution with
  38 * an iterative one (in case of non-nested symlink chains).  It does
  39 * this with calls to <fs>_follow_link().
  40 * As a side effect, dir_namei(), _namei() and follow_link() are now 
  41 * replaced with a single function lookup_dentry() that can handle all 
  42 * the special cases of the former code.
  43 *
  44 * With the new dcache, the pathname is stored at each inode, at least as
  45 * long as the refcount of the inode is positive.  As a side effect, the
  46 * size of the dcache depends on the inode cache and thus is dynamic.
  47 *
  48 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  49 * resolution to correspond with current state of the code.
  50 *
  51 * Note that the symlink resolution is not *completely* iterative.
  52 * There is still a significant amount of tail- and mid- recursion in
  53 * the algorithm.  Also, note that <fs>_readlink() is not used in
  54 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  55 * may return different results than <fs>_follow_link().  Many virtual
  56 * filesystems (including /proc) exhibit this behavior.
  57 */
  58
  59/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  60 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  61 * and the name already exists in form of a symlink, try to create the new
  62 * name indicated by the symlink. The old code always complained that the
  63 * name already exists, due to not following the symlink even if its target
  64 * is nonexistent.  The new semantics affects also mknod() and link() when
  65 * the name is a symlink pointing to a non-existant name.
  66 *
  67 * I don't know which semantics is the right one, since I have no access
  68 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  69 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  70 * "old" one. Personally, I think the new semantics is much more logical.
  71 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  72 * file does succeed in both HP-UX and SunOs, but not in Solaris
  73 * and in the old Linux semantics.
  74 */
  75
  76/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  77 * semantics.  See the comments in "open_namei" and "do_link" below.
  78 *
  79 * [10-Sep-98 Alan Modra] Another symlink change.
  80 */
  81
  82/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
  83 *      inside the path - always follow.
  84 *      in the last component in creation/removal/renaming - never follow.
  85 *      if LOOKUP_FOLLOW passed - follow.
  86 *      if the pathname has trailing slashes - follow.
  87 *      otherwise - don't follow.
  88 * (applied in that order).
  89 *
  90 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
  91 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
  92 * During the 2.4 we need to fix the userland stuff depending on it -
  93 * hopefully we will be able to get rid of that wart in 2.5. So far only
  94 * XEmacs seems to be relying on it...
  95 */
  96
  97/* In order to reduce some races, while at the same time doing additional
  98 * checking and hopefully speeding things up, we copy filenames to the
  99 * kernel data space before using them..
 100 *
 101 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 102 * PATH_MAX includes the nul terminator --RR.
 103 */
 104static inline int do_getname(const char *filename, char *page)
 105{
 106        int retval;
 107        unsigned long len = PATH_MAX;
 108
 109        if ((unsigned long) filename >= TASK_SIZE) {
 110                if (!segment_eq(get_fs(), KERNEL_DS))
 111                        return -EFAULT;
 112        } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
 113                len = TASK_SIZE - (unsigned long) filename;
 114
 115        retval = strncpy_from_user((char *)page, filename, len);
 116        if (retval > 0) {
 117                if (retval < len)
 118                        return 0;
 119                return -ENAMETOOLONG;
 120        } else if (!retval)
 121                retval = -ENOENT;
 122        return retval;
 123}
 124
 125char * getname(const char * filename)
 126{
 127        char *tmp, *result;
 128
 129        result = ERR_PTR(-ENOMEM);
 130        tmp = __getname();
 131        if (tmp)  {
 132                int retval = do_getname(filename, tmp);
 133
 134                result = tmp;
 135                if (retval < 0) {
 136                        putname(tmp);
 137                        result = ERR_PTR(retval);
 138                }
 139        }
 140        return result;
 141}
 142
 143/*
 144 *      vfs_permission()
 145 *
 146 * is used to check for read/write/execute permissions on a file.
 147 * We use "fsuid" for this, letting us set arbitrary permissions
 148 * for filesystem access without changing the "normal" uids which
 149 * are used for other things..
 150 */
 151int vfs_permission(struct inode * inode, int mask)
 152{
 153        umode_t                 mode = inode->i_mode;
 154
 155        if (mask & MAY_WRITE) {
 156                /*
 157                 * Nobody gets write access to a read-only fs.
 158                 */
 159                if (IS_RDONLY(inode) &&
 160                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 161                        return -EROFS;
 162
 163                /*
 164                 * Nobody gets write access to an immutable file.
 165                 */
 166                if (IS_IMMUTABLE(inode))
 167                        return -EACCES;
 168        }
 169
 170        if (current->fsuid == inode->i_uid)
 171                mode >>= 6;
 172        else if (in_group_p(inode->i_gid))
 173                mode >>= 3;
 174
 175        /*
 176         * If the DACs are ok we don't need any capability check.
 177         */
 178        if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
 179                return 0;
 180
 181        /*
 182         * Read/write DACs are always overridable.
 183         * Executable DACs are overridable if at least one exec bit is set.
 184         */
 185        if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
 186                if (capable(CAP_DAC_OVERRIDE))
 187                        return 0;
 188
 189        /*
 190         * Searching includes executable on directories, else just read.
 191         */
 192        if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
 193                if (capable(CAP_DAC_READ_SEARCH))
 194                        return 0;
 195
 196        return -EACCES;
 197}
 198
 199int permission(struct inode * inode,int mask)
 200{
 201        if (inode->i_op && inode->i_op->permission) {
 202                int retval;
 203                lock_kernel();
 204                retval = inode->i_op->permission(inode, mask);
 205                unlock_kernel();
 206                return retval;
 207        }
 208        return vfs_permission(inode, mask);
 209}
 210
 211/*
 212 * get_write_access() gets write permission for a file.
 213 * put_write_access() releases this write permission.
 214 * This is used for regular files.
 215 * We cannot support write (and maybe mmap read-write shared) accesses and
 216 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 217 * can have the following values:
 218 * 0: no writers, no VM_DENYWRITE mappings
 219 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 220 * > 0: (i_writecount) users are writing to the file.
 221 *
 222 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
 223 * except for the cases where we don't hold i_writecount yet. Then we need to
 224 * use {get,deny}_write_access() - these functions check the sign and refuse
 225 * to do the change if sign is wrong. Exclusion between them is provided by
 226 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
 227 * who will try to move it in struct inode - just leave it here.
 228 */
 229static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
 230int get_write_access(struct inode * inode)
 231{
 232        spin_lock(&arbitration_lock);
 233        if (atomic_read(&inode->i_writecount) < 0) {
 234                spin_unlock(&arbitration_lock);
 235                return -ETXTBSY;
 236        }
 237        atomic_inc(&inode->i_writecount);
 238        spin_unlock(&arbitration_lock);
 239        return 0;
 240}
 241int deny_write_access(struct file * file)
 242{
 243        spin_lock(&arbitration_lock);
 244        if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
 245                spin_unlock(&arbitration_lock);
 246                return -ETXTBSY;
 247        }
 248        atomic_dec(&file->f_dentry->d_inode->i_writecount);
 249        spin_unlock(&arbitration_lock);
 250        return 0;
 251}
 252
 253void path_release(struct nameidata *nd)
 254{
 255        dput(nd->dentry);
 256        mntput(nd->mnt);
 257}
 258
 259/*
 260 * Internal lookup() using the new generic dcache.
 261 * SMP-safe
 262 */
 263static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 264{
 265        struct dentry * dentry = d_lookup(parent, name);
 266
 267        if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 268                if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 269                        dput(dentry);
 270                        dentry = NULL;
 271                }
 272        }
 273        return dentry;
 274}
 275
 276/*
 277 * This is called when everything else fails, and we actually have
 278 * to go to the low-level filesystem to find out what we should do..
 279 *
 280 * We get the directory semaphore, and after getting that we also
 281 * make sure that nobody added the entry to the dcache in the meantime..
 282 * SMP-safe
 283 */
 284static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 285{
 286        struct dentry * result;
 287        struct inode *dir = parent->d_inode;
 288
 289        down(&dir->i_sem);
 290        /*
 291         * First re-do the cached lookup just in case it was created
 292         * while we waited for the directory semaphore..
 293         *
 294         * FIXME! This could use version numbering or similar to
 295         * avoid unnecessary cache lookups.
 296         */
 297        result = d_lookup(parent, name);
 298        if (!result) {
 299                struct dentry * dentry = d_alloc(parent, name);
 300                result = ERR_PTR(-ENOMEM);
 301                if (dentry) {
 302                        lock_kernel();
 303                        result = dir->i_op->lookup(dir, dentry);
 304                        unlock_kernel();
 305                        if (result)
 306                                dput(dentry);
 307                        else
 308                                result = dentry;
 309                }
 310                up(&dir->i_sem);
 311                return result;
 312        }
 313
 314        /*
 315         * Uhhuh! Nasty case: the cache was re-populated while
 316         * we waited on the semaphore. Need to revalidate.
 317         */
 318        up(&dir->i_sem);
 319        if (result->d_op && result->d_op->d_revalidate) {
 320                if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
 321                        dput(result);
 322                        result = ERR_PTR(-ENOENT);
 323                }
 324        }
 325        return result;
 326}
 327
 328/*
 329 * This limits recursive symlink follows to 8, while
 330 * limiting consecutive symlinks to 40.
 331 *
 332 * Without that kind of total limit, nasty chains of consecutive
 333 * symlinks can cause almost arbitrarily long lookups. 
 334 */
 335static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 336{
 337        int err;
 338        if (current->link_count >= 5)
 339                goto loop;
 340        if (current->total_link_count >= 40)
 341                goto loop;
 342        if (current->need_resched) {
 343                current->state = TASK_RUNNING;
 344                schedule();
 345        }
 346        current->link_count++;
 347        current->total_link_count++;
 348        UPDATE_ATIME(dentry->d_inode);
 349        err = dentry->d_inode->i_op->follow_link(dentry, nd);
 350        current->link_count--;
 351        return err;
 352loop:
 353        path_release(nd);
 354        return -ELOOP;
 355}
 356
 357static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
 358{
 359        struct vfsmount *parent;
 360        struct dentry *dentry;
 361        spin_lock(&dcache_lock);
 362        parent=(*mnt)->mnt_parent;
 363        if (parent == *mnt) {
 364                spin_unlock(&dcache_lock);
 365                return 0;
 366        }
 367        mntget(parent);
 368        dentry=dget((*mnt)->mnt_mountpoint);
 369        spin_unlock(&dcache_lock);
 370        dput(*base);
 371        *base = dentry;
 372        mntput(*mnt);
 373        *mnt = parent;
 374        return 1;
 375}
 376
 377int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 378{
 379        return __follow_up(mnt, dentry);
 380}
 381
 382static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 383{
 384        struct vfsmount *mounted;
 385
 386        spin_lock(&dcache_lock);
 387        mounted = lookup_mnt(*mnt, *dentry);
 388        if (mounted) {
 389                *mnt = mntget(mounted);
 390                spin_unlock(&dcache_lock);
 391                dput(*dentry);
 392                mntput(mounted->mnt_parent);
 393                *dentry = dget(mounted->mnt_root);
 394                return 1;
 395        }
 396        spin_unlock(&dcache_lock);
 397        return 0;
 398}
 399
 400int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 401{
 402        return __follow_down(mnt,dentry);
 403}
 404 
 405static inline void follow_dotdot(struct nameidata *nd)
 406{
 407        while(1) {
 408                struct vfsmount *parent;
 409                struct dentry *dentry;
 410                read_lock(&current->fs->lock);
 411                if (nd->dentry == current->fs->root &&
 412                    nd->mnt == current->fs->rootmnt)  {
 413                        read_unlock(&current->fs->lock);
 414                        break;
 415                }
 416                read_unlock(&current->fs->lock);
 417                spin_lock(&dcache_lock);
 418                if (nd->dentry != nd->mnt->mnt_root) {
 419                        dentry = dget(nd->dentry->d_parent);
 420                        spin_unlock(&dcache_lock);
 421                        dput(nd->dentry);
 422                        nd->dentry = dentry;
 423                        break;
 424                }
 425                parent=nd->mnt->mnt_parent;
 426                if (parent == nd->mnt) {
 427                        spin_unlock(&dcache_lock);
 428                        break;
 429                }
 430                mntget(parent);
 431                dentry=dget(nd->mnt->mnt_mountpoint);
 432                spin_unlock(&dcache_lock);
 433                dput(nd->dentry);
 434                nd->dentry = dentry;
 435                mntput(nd->mnt);
 436                nd->mnt = parent;
 437        }
 438        while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
 439                ;
 440}
 441
 442/*
 443 * Name resolution.
 444 *
 445 * This is the basic name resolution function, turning a pathname
 446 * into the final dentry.
 447 *
 448 * We expect 'base' to be positive and a directory.
 449 */
 450int link_path_walk(const char * name, struct nameidata *nd)
 451{
 452        struct dentry *dentry;
 453        struct inode *inode;
 454        int err;
 455        unsigned int lookup_flags = nd->flags;
 456
 457        while (*name=='/')
 458                name++;
 459        if (!*name)
 460                goto return_reval;
 461
 462        inode = nd->dentry->d_inode;
 463        if (current->link_count)
 464                lookup_flags = LOOKUP_FOLLOW;
 465
 466        /* At this point we know we have a real path component. */
 467        for(;;) {
 468                unsigned long hash;
 469                struct qstr this;
 470                unsigned int c;
 471
 472                err = permission(inode, MAY_EXEC);
 473                dentry = ERR_PTR(err);
 474                if (err)
 475                        break;
 476
 477                this.name = name;
 478                c = *(const unsigned char *)name;
 479
 480                hash = init_name_hash();
 481                do {
 482                        name++;
 483                        hash = partial_name_hash(c, hash);
 484                        c = *(const unsigned char *)name;
 485                } while (c && (c != '/'));
 486                this.len = name - (const char *) this.name;
 487                this.hash = end_name_hash(hash);
 488
 489                /* remove trailing slashes? */
 490                if (!c)
 491                        goto last_component;
 492                while (*++name == '/');
 493                if (!*name)
 494                        goto last_with_slashes;
 495
 496                /*
 497                 * "." and ".." are special - ".." especially so because it has
 498                 * to be able to know about the current root directory and
 499                 * parent relationships.
 500                 */
 501                if (this.name[0] == '.') switch (this.len) {
 502                        default:
 503                                break;
 504                        case 2: 
 505                                if (this.name[1] != '.')
 506                                        break;
 507                                follow_dotdot(nd);
 508                                inode = nd->dentry->d_inode;
 509                                /* fallthrough */
 510                        case 1:
 511                                continue;
 512                }
 513                /*
 514                 * See if the low-level filesystem might want
 515                 * to use its own hash..
 516                 */
 517                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 518                        err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 519                        if (err < 0)
 520                                break;
 521                }
 522                /* This does the actual lookups.. */
 523                dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 524                if (!dentry) {
 525                        dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 526                        err = PTR_ERR(dentry);
 527                        if (IS_ERR(dentry))
 528                                break;
 529                }
 530                /* Check mountpoints.. */
 531                while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 532                        ;
 533
 534                err = -ENOENT;
 535                inode = dentry->d_inode;
 536                if (!inode)
 537                        goto out_dput;
 538                err = -ENOTDIR; 
 539                if (!inode->i_op)
 540                        goto out_dput;
 541
 542                if (inode->i_op->follow_link) {
 543                        err = do_follow_link(dentry, nd);
 544                        dput(dentry);
 545                        if (err)
 546                                goto return_err;
 547                        err = -ENOENT;
 548                        inode = nd->dentry->d_inode;
 549                        if (!inode)
 550                                break;
 551                        err = -ENOTDIR; 
 552                        if (!inode->i_op)
 553                                break;
 554                } else {
 555                        dput(nd->dentry);
 556                        nd->dentry = dentry;
 557                }
 558                err = -ENOTDIR; 
 559                if (!inode->i_op->lookup)
 560                        break;
 561                continue;
 562                /* here ends the main loop */
 563
 564last_with_slashes:
 565                lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 566last_component:
 567                if (lookup_flags & LOOKUP_PARENT)
 568                        goto lookup_parent;
 569                if (this.name[0] == '.') switch (this.len) {
 570                        default:
 571                                break;
 572                        case 2: 
 573                                if (this.name[1] != '.')
 574                                        break;
 575                                follow_dotdot(nd);
 576                                inode = nd->dentry->d_inode;
 577                                /* fallthrough */
 578                        case 1:
 579                                goto return_reval;
 580                }
 581                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 582                        err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 583                        if (err < 0)
 584                                break;
 585                }
 586                dentry = cached_lookup(nd->dentry, &this, 0);
 587                if (!dentry) {
 588                        dentry = real_lookup(nd->dentry, &this, 0);
 589                        err = PTR_ERR(dentry);
 590                        if (IS_ERR(dentry))
 591                                break;
 592                }
 593                while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 594                        ;
 595                inode = dentry->d_inode;
 596                if ((lookup_flags & LOOKUP_FOLLOW)
 597                    && inode && inode->i_op && inode->i_op->follow_link) {
 598                        err = do_follow_link(dentry, nd);
 599                        dput(dentry);
 600                        if (err)
 601                                goto return_err;
 602                        inode = nd->dentry->d_inode;
 603                } else {
 604                        dput(nd->dentry);
 605                        nd->dentry = dentry;
 606                }
 607                err = -ENOENT;
 608                if (!inode)
 609                        goto no_inode;
 610                if (lookup_flags & LOOKUP_DIRECTORY) {
 611                        err = -ENOTDIR; 
 612                        if (!inode->i_op || !inode->i_op->lookup)
 613                                break;
 614                }
 615                goto return_base;
 616no_inode:
 617                err = -ENOENT;
 618                if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
 619                        break;
 620                goto return_base;
 621lookup_parent:
 622                nd->last = this;
 623                nd->last_type = LAST_NORM;
 624                if (this.name[0] != '.')
 625                        goto return_base;
 626                if (this.len == 1)
 627                        nd->last_type = LAST_DOT;
 628                else if (this.len == 2 && this.name[1] == '.')
 629                        nd->last_type = LAST_DOTDOT;
 630                else
 631                        goto return_base;
 632return_reval:
 633                /*
 634                 * We bypassed the ordinary revalidation routines.
 635                 * Check the cached dentry for staleness.
 636                 */
 637                dentry = nd->dentry;
 638                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 639                        err = -ESTALE;
 640                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
 641                                d_invalidate(dentry);
 642                                break;
 643                        }
 644                }
 645return_base:
 646                return 0;
 647out_dput:
 648                dput(dentry);
 649                break;
 650        }
 651        path_release(nd);
 652return_err:
 653        return err;
 654}
 655
 656int path_walk(const char * name, struct nameidata *nd)
 657{
 658        current->total_link_count = 0;
 659        return link_path_walk(name, nd);
 660}
 661
 662/* SMP-safe */
 663/* returns 1 if everything is done */
 664static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 665{
 666        if (path_walk(name, nd))
 667                return 0;               /* something went wrong... */
 668
 669        if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
 670                struct nameidata nd_root;
 671                /*
 672                 * NAME was not found in alternate root or it's a directory.  Try to find
 673                 * it in the normal root:
 674                 */
 675                nd_root.last_type = LAST_ROOT;
 676                nd_root.flags = nd->flags;
 677                read_lock(&current->fs->lock);
 678                nd_root.mnt = mntget(current->fs->rootmnt);
 679                nd_root.dentry = dget(current->fs->root);
 680                read_unlock(&current->fs->lock);
 681                if (path_walk(name, &nd_root))
 682                        return 1;
 683                if (nd_root.dentry->d_inode) {
 684                        path_release(nd);
 685                        nd->dentry = nd_root.dentry;
 686                        nd->mnt = nd_root.mnt;
 687                        nd->last = nd_root.last;
 688                        return 1;
 689                }
 690                path_release(&nd_root);
 691        }
 692        return 1;
 693}
 694
 695void set_fs_altroot(void)
 696{
 697        char *emul = __emul_prefix();
 698        struct nameidata nd;
 699        struct vfsmount *mnt = NULL, *oldmnt;
 700        struct dentry *dentry = NULL, *olddentry;
 701        if (emul) {
 702                read_lock(&current->fs->lock);
 703                nd.mnt = mntget(current->fs->rootmnt);
 704                nd.dentry = dget(current->fs->root);
 705                read_unlock(&current->fs->lock);
 706                nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
 707                if (path_walk(emul,&nd) == 0) {
 708                        mnt = nd.mnt;
 709                        dentry = nd.dentry;
 710                }
 711        }
 712        write_lock(&current->fs->lock);
 713        oldmnt = current->fs->altrootmnt;
 714        olddentry = current->fs->altroot;
 715        current->fs->altrootmnt = mnt;
 716        current->fs->altroot = dentry;
 717        write_unlock(&current->fs->lock);
 718        if (olddentry) {
 719                dput(olddentry);
 720                mntput(oldmnt);
 721        }
 722}
 723
 724/* SMP-safe */
 725static inline int
 726walk_init_root(const char *name, struct nameidata *nd)
 727{
 728        read_lock(&current->fs->lock);
 729        if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 730                nd->mnt = mntget(current->fs->altrootmnt);
 731                nd->dentry = dget(current->fs->altroot);
 732                read_unlock(&current->fs->lock);
 733                if (__emul_lookup_dentry(name,nd))
 734                        return 0;
 735                read_lock(&current->fs->lock);
 736        }
 737        nd->mnt = mntget(current->fs->rootmnt);
 738        nd->dentry = dget(current->fs->root);
 739        read_unlock(&current->fs->lock);
 740        return 1;
 741}
 742
 743/* SMP-safe */
 744int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
 745{
 746        int error = 0;
 747        if (path_init(path, flags, nd))
 748                error = path_walk(path, nd);
 749        return error;
 750}
 751
 752
 753/* SMP-safe */
 754int path_init(const char *name, unsigned int flags, struct nameidata *nd)
 755{
 756        nd->last_type = LAST_ROOT; /* if there are only slashes... */
 757        nd->flags = flags;
 758        if (*name=='/')
 759                return walk_init_root(name,nd);
 760        read_lock(&current->fs->lock);
 761        nd->mnt = mntget(current->fs->pwdmnt);
 762        nd->dentry = dget(current->fs->pwd);
 763        read_unlock(&current->fs->lock);
 764        return 1;
 765}
 766
 767/*
 768 * Restricted form of lookup. Doesn't follow links, single-component only,
 769 * needs parent already locked. Doesn't follow mounts.
 770 * SMP-safe.
 771 */
 772struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
 773{
 774        struct dentry * dentry;
 775        struct inode *inode;
 776        int err;
 777
 778        inode = base->d_inode;
 779        err = permission(inode, MAY_EXEC);
 780        dentry = ERR_PTR(err);
 781        if (err)
 782                goto out;
 783
 784        /*
 785         * See if the low-level filesystem might want
 786         * to use its own hash..
 787         */
 788        if (base->d_op && base->d_op->d_hash) {
 789                err = base->d_op->d_hash(base, name);
 790                dentry = ERR_PTR(err);
 791                if (err < 0)
 792                        goto out;
 793        }
 794
 795        dentry = cached_lookup(base, name, 0);
 796        if (!dentry) {
 797                struct dentry *new = d_alloc(base, name);
 798                dentry = ERR_PTR(-ENOMEM);
 799                if (!new)
 800                        goto out;
 801                lock_kernel();
 802                dentry = inode->i_op->lookup(inode, new);
 803                unlock_kernel();
 804                if (!dentry)
 805                        dentry = new;
 806                else
 807                        dput(new);
 808        }
 809out:
 810        return dentry;
 811}
 812
 813/* SMP-safe */
 814struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
 815{
 816        unsigned long hash;
 817        struct qstr this;
 818        unsigned int c;
 819
 820        this.name = name;
 821        this.len = len;
 822        if (!len)
 823                goto access;
 824
 825        hash = init_name_hash();
 826        while (len--) {
 827                c = *(const unsigned char *)name++;
 828                if (c == '/' || c == '\0')
 829                        goto access;
 830                hash = partial_name_hash(c, hash);
 831        }
 832        this.hash = end_name_hash(hash);
 833
 834        return lookup_hash(&this, base);
 835access:
 836        return ERR_PTR(-EACCES);
 837}
 838
 839/*
 840 *      namei()
 841 *
 842 * is used by most simple commands to get the inode of a specified name.
 843 * Open, link etc use their own routines, but this is enough for things
 844 * like 'chmod' etc.
 845 *
 846 * namei exists in two versions: namei/lnamei. The only difference is
 847 * that namei follows links, while lnamei does not.
 848 * SMP-safe
 849 */
 850int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
 851{
 852        char *tmp;
 853        int err;
 854
 855        tmp = getname(name);
 856        err = PTR_ERR(tmp);
 857        if (!IS_ERR(tmp)) {
 858                err = 0;
 859                err = path_lookup(tmp, flags, nd);
 860                putname(tmp);
 861        }
 862        return err;
 863}
 864
 865/*
 866 * It's inline, so penalty for filesystems that don't use sticky bit is
 867 * minimal.
 868 */
 869static inline int check_sticky(struct inode *dir, struct inode *inode)
 870{
 871        if (!(dir->i_mode & S_ISVTX))
 872                return 0;
 873        if (inode->i_uid == current->fsuid)
 874                return 0;
 875        if (dir->i_uid == current->fsuid)
 876                return 0;
 877        return !capable(CAP_FOWNER);
 878}
 879
 880/*
 881 *      Check whether we can remove a link victim from directory dir, check
 882 *  whether the type of victim is right.
 883 *  1. We can't do it if dir is read-only (done in permission())
 884 *  2. We should have write and exec permissions on dir
 885 *  3. We can't remove anything from append-only dir
 886 *  4. We can't do anything with immutable dir (done in permission())
 887 *  5. If the sticky bit on dir is set we should either
 888 *      a. be owner of dir, or
 889 *      b. be owner of victim, or
 890 *      c. have CAP_FOWNER capability
 891 *  6. If the victim is append-only or immutable we can't do antyhing with
 892 *     links pointing to it.
 893 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 894 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 895 *  9. We can't remove a root or mountpoint.
 896 */
 897static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 898{
 899        int error;
 900        if (!victim->d_inode || victim->d_parent->d_inode != dir)
 901                return -ENOENT;
 902        error = permission(dir,MAY_WRITE | MAY_EXEC);
 903        if (error)
 904                return error;
 905        if (IS_APPEND(dir))
 906                return -EPERM;
 907        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 908            IS_IMMUTABLE(victim->d_inode))
 909                return -EPERM;
 910        if (isdir) {
 911                if (!S_ISDIR(victim->d_inode->i_mode))
 912                        return -ENOTDIR;
 913                if (IS_ROOT(victim))
 914                        return -EBUSY;
 915        } else if (S_ISDIR(victim->d_inode->i_mode))
 916                return -EISDIR;
 917        if (IS_DEADDIR(dir))
 918                return -ENOENT;
 919        return 0;
 920}
 921
 922/*      Check whether we can create an object with dentry child in directory
 923 *  dir.
 924 *  1. We can't do it if child already exists (open has special treatment for
 925 *     this case, but since we are inlined it's OK)
 926 *  2. We can't do it if dir is read-only (done in permission())
 927 *  3. We should have write and exec permissions on dir
 928 *  4. We can't do it if dir is immutable (done in permission())
 929 */
 930static inline int may_create(struct inode *dir, struct dentry *child) {
 931        if (child->d_inode)
 932                return -EEXIST;
 933        if (IS_DEADDIR(dir))
 934                return -ENOENT;
 935        return permission(dir,MAY_WRITE | MAY_EXEC);
 936}
 937
 938/* 
 939 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 940 * reasons.
 941 *
 942 * O_DIRECTORY translates into forcing a directory lookup.
 943 */
 944static inline int lookup_flags(unsigned int f)
 945{
 946        unsigned long retval = LOOKUP_FOLLOW;
 947
 948        if (f & O_NOFOLLOW)
 949                retval &= ~LOOKUP_FOLLOW;
 950        
 951        if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 952                retval &= ~LOOKUP_FOLLOW;
 953        
 954        if (f & O_DIRECTORY)
 955                retval |= LOOKUP_DIRECTORY;
 956
 957        return retval;
 958}
 959
 960int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
 961{
 962        int error;
 963
 964        mode &= S_IALLUGO;
 965        mode |= S_IFREG;
 966
 967        down(&dir->i_zombie);
 968        error = may_create(dir, dentry);
 969        if (error)
 970                goto exit_lock;
 971
 972        error = -EACCES;        /* shouldn't it be ENOSYS? */
 973        if (!dir->i_op || !dir->i_op->create)
 974                goto exit_lock;
 975
 976        DQUOT_INIT(dir);
 977        lock_kernel();
 978        error = dir->i_op->create(dir, dentry, mode);
 979        unlock_kernel();
 980exit_lock:
 981        up(&dir->i_zombie);
 982        if (!error)
 983                inode_dir_notify(dir, DN_CREATE);
 984        return error;
 985}
 986
 987/*
 988 *      open_namei()
 989 *
 990 * namei for open - this is in fact almost the whole open-routine.
 991 *
 992 * Note that the low bits of "flag" aren't the same as in the open
 993 * system call - they are 00 - no permissions needed
 994 *                        01 - read permission needed
 995 *                        10 - write permission needed
 996 *                        11 - read/write permissions needed
 997 * which is a lot more logical, and also allows the "no perm" needed
 998 * for symlinks (where the permissions are checked later).
 999 * SMP-safe
1000 */
1001int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1002{
1003        int acc_mode, error = 0;
1004        struct inode *inode;
1005        struct dentry *dentry;
1006        struct dentry *dir;
1007        int count = 0;
1008
1009        acc_mode = ACC_MODE(flag);
1010
1011        /*
1012         * The simplest case - just a plain lookup.
1013         */
1014        if (!(flag & O_CREAT)) {
1015                error = path_lookup(pathname, lookup_flags(flag), nd);
1016                if (error)
1017                        return error;
1018                dentry = nd->dentry;
1019                goto ok;
1020        }
1021
1022        /*
1023         * Create - we need to know the parent.
1024         */
1025        error = path_lookup(pathname, LOOKUP_PARENT, nd);
1026        if (error)
1027                return error;
1028
1029        /*
1030         * We have the parent and last component. First of all, check
1031         * that we are not asked to creat(2) an obvious directory - that
1032         * will not do.
1033         */
1034        error = -EISDIR;
1035        if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1036                goto exit;
1037
1038        dir = nd->dentry;
1039        down(&dir->d_inode->i_sem);
1040        dentry = lookup_hash(&nd->last, nd->dentry);
1041
1042do_last:
1043        error = PTR_ERR(dentry);
1044        if (IS_ERR(dentry)) {
1045                up(&dir->d_inode->i_sem);
1046                goto exit;
1047        }
1048
1049        /* Negative dentry, just create the file */
1050        if (!dentry->d_inode) {
1051                error = vfs_create(dir->d_inode, dentry,
1052                                   mode & ~current->fs->umask);
1053                up(&dir->d_inode->i_sem);
1054                dput(nd->dentry);
1055                nd->dentry = dentry;
1056                if (error)
1057                        goto exit;
1058                /* Don't check for write permission, don't truncate */
1059                acc_mode = 0;
1060                flag &= ~O_TRUNC;
1061                goto ok;
1062        }
1063
1064        /*
1065         * It already exists.
1066         */
1067        up(&dir->d_inode->i_sem);
1068
1069        error = -EEXIST;
1070        if (flag & O_EXCL)
1071                goto exit_dput;
1072
1073        if (d_mountpoint(dentry)) {
1074                error = -ELOOP;
1075                if (flag & O_NOFOLLOW)
1076                        goto exit_dput;
1077                while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1078        }
1079        error = -ENOENT;
1080        if (!dentry->d_inode)
1081                goto exit_dput;
1082        if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1083                goto do_link;
1084
1085        dput(nd->dentry);
1086        nd->dentry = dentry;
1087        error = -EISDIR;
1088        if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1089                goto exit;
1090ok:
1091        error = -ENOENT;
1092        inode = dentry->d_inode;
1093        if (!inode)
1094                goto exit;
1095
1096        error = -ELOOP;
1097        if (S_ISLNK(inode->i_mode))
1098                goto exit;
1099        
1100        error = -EISDIR;
1101        if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1102                goto exit;
1103
1104        error = permission(inode,acc_mode);
1105        if (error)
1106                goto exit;
1107
1108        /*
1109         * FIFO's, sockets and device files are special: they don't
1110         * actually live on the filesystem itself, and as such you
1111         * can write to them even if the filesystem is read-only.
1112         */
1113        if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1114                flag &= ~O_TRUNC;
1115        } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1116                error = -EACCES;
1117                if (nd->mnt->mnt_flags & MNT_NODEV)
1118                        goto exit;
1119
1120                flag &= ~O_TRUNC;
1121        } else {
1122                error = -EROFS;
1123                if (IS_RDONLY(inode) && (flag & 2))
1124                        goto exit;
1125        }
1126        /*
1127         * An append-only file must be opened in append mode for writing.
1128         */
1129        error = -EPERM;
1130        if (IS_APPEND(inode)) {
1131                if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1132                        goto exit;
1133                if (flag & O_TRUNC)
1134                        goto exit;
1135        }
1136
1137        /*
1138         * Ensure there are no outstanding leases on the file.
1139         */
1140        error = get_lease(inode, flag);
1141        if (error)
1142                goto exit;
1143
1144        if (flag & O_TRUNC) {
1145                error = get_write_access(inode);
1146                if (error)
1147                        goto exit;
1148
1149                /*
1150                 * Refuse to truncate files with mandatory locks held on them.
1151                 */
1152                error = locks_verify_locked(inode);
1153                if (!error) {
1154                        DQUOT_INIT(inode);
1155                        
1156                        error = do_truncate(dentry, 0);
1157                }
1158                put_write_access(inode);
1159                if (error)
1160                        goto exit;
1161        } else
1162                if (flag & FMODE_WRITE)
1163                        DQUOT_INIT(inode);
1164
1165        return 0;
1166
1167exit_dput:
1168        dput(dentry);
1169exit:
1170        path_release(nd);
1171        return error;
1172
1173do_link:
1174        error = -ELOOP;
1175        if (flag & O_NOFOLLOW)
1176                goto exit_dput;
1177        /*
1178         * This is subtle. Instead of calling do_follow_link() we do the
1179         * thing by hands. The reason is that this way we have zero link_count
1180         * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1181         * After that we have the parent and last component, i.e.
1182         * we are in the same situation as after the first path_walk().
1183         * Well, almost - if the last component is normal we get its copy
1184         * stored in nd->last.name and we will have to putname() it when we
1185         * are done. Procfs-like symlinks just set LAST_BIND.
1186         */
1187        UPDATE_ATIME(dentry->d_inode);
1188        error = dentry->d_inode->i_op->follow_link(dentry, nd);
1189        dput(dentry);
1190        if (error)
1191                return error;
1192        if (nd->last_type == LAST_BIND) {
1193                dentry = nd->dentry;
1194                goto ok;
1195        }
1196        error = -EISDIR;
1197        if (nd->last_type != LAST_NORM)
1198                goto exit;
1199        if (nd->last.name[nd->last.len]) {
1200                putname(nd->last.name);
1201                goto exit;
1202        }
1203        error = -ELOOP;
1204        if (count++==32) {
1205                putname(nd->last.name);
1206                goto exit;
1207        }
1208        dir = nd->dentry;
1209        down(&dir->d_inode->i_sem);
1210        dentry = lookup_hash(&nd->last, nd->dentry);
1211        putname(nd->last.name);
1212        goto do_last;
1213}
1214
1215/* SMP-safe */
1216static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1217{
1218        struct dentry *dentry;
1219
1220        down(&nd->dentry->d_inode->i_sem);
1221        dentry = ERR_PTR(-EEXIST);
1222        if (nd->last_type != LAST_NORM)
1223                goto fail;
1224        dentry = lookup_hash(&nd->last, nd->dentry);
1225        if (IS_ERR(dentry))
1226                goto fail;
1227        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1228                goto enoent;
1229        return dentry;
1230enoent:
1231        dput(dentry);
1232        dentry = ERR_PTR(-ENOENT);
1233fail:
1234        return dentry;
1235}
1236
1237int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1238{
1239        int error = -EPERM;
1240
1241        down(&dir->i_zombie);
1242        if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1243                goto exit_lock;
1244
1245        error = may_create(dir, dentry);
1246        if (error)
1247                goto exit_lock;
1248
1249        error = -EPERM;
1250        if (!dir->i_op || !dir->i_op->mknod)
1251                goto exit_lock;
1252
1253        DQUOT_INIT(dir);
1254        lock_kernel();
1255        error = dir->i_op->mknod(dir, dentry, mode, dev);
1256        unlock_kernel();
1257exit_lock:
1258        up(&dir->i_zombie);
1259        if (!error)
1260                inode_dir_notify(dir, DN_CREATE);
1261        return error;
1262}
1263
1264asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1265{
1266        int error = 0;
1267        char * tmp;
1268        struct dentry * dentry;
1269        struct nameidata nd;
1270
1271        if (S_ISDIR(mode))
1272                return -EPERM;
1273        tmp = getname(filename);
1274        if (IS_ERR(tmp))
1275                return PTR_ERR(tmp);
1276
1277        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1278        if (error)
1279                goto out;
1280        dentry = lookup_create(&nd, 0);
1281        error = PTR_ERR(dentry);
1282
1283        mode &= ~current->fs->umask;
1284        if (!IS_ERR(dentry)) {
1285                switch (mode & S_IFMT) {
1286                case 0: case S_IFREG:
1287                        error = vfs_create(nd.dentry->d_inode,dentry,mode);
1288                        break;
1289                case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1290                        error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1291                        break;
1292                case S_IFDIR:
1293                        error = -EPERM;
1294                        break;
1295                default:
1296                        error = -EINVAL;
1297                }
1298                dput(dentry);
1299        }
1300        up(&nd.dentry->d_inode->i_sem);
1301        path_release(&nd);
1302out:
1303        putname(tmp);
1304
1305        return error;
1306}
1307
1308int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1309{
1310        int error;
1311
1312        down(&dir->i_zombie);
1313        error = may_create(dir, dentry);
1314        if (error)
1315                goto exit_lock;
1316
1317        error = -EPERM;
1318        if (!dir->i_op || !dir->i_op->mkdir)
1319                goto exit_lock;
1320
1321        DQUOT_INIT(dir);
1322        mode &= (S_IRWXUGO|S_ISVTX);
1323        lock_kernel();
1324        error = dir->i_op->mkdir(dir, dentry, mode);
1325        unlock_kernel();
1326
1327exit_lock:
1328        up(&dir->i_zombie);
1329        if (!error)
1330                inode_dir_notify(dir, DN_CREATE);
1331        return error;
1332}
1333
1334asmlinkage long sys_mkdir(const char * pathname, int mode)
1335{
1336        int error = 0;
1337        char * tmp;
1338
1339        tmp = getname(pathname);
1340        error = PTR_ERR(tmp);
1341        if (!IS_ERR(tmp)) {
1342                struct dentry *dentry;
1343                struct nameidata nd;
1344
1345                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1346                if (error)
1347                        goto out;
1348                dentry = lookup_create(&nd, 1);
1349                error = PTR_ERR(dentry);
1350                if (!IS_ERR(dentry)) {
1351                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
1352                                          mode & ~current->fs->umask);
1353                        dput(dentry);
1354                }
1355                up(&nd.dentry->d_inode->i_sem);
1356                path_release(&nd);
1357out:
1358                putname(tmp);
1359        }
1360
1361        return error;
1362}
1363
1364/*
1365 * We try to drop the dentry early: we should have
1366 * a usage count of 2 if we're the only user of this
1367 * dentry, and if that is true (possibly after pruning
1368 * the dcache), then we drop the dentry now.
1369 *
1370 * A low-level filesystem can, if it choses, legally
1371 * do a
1372 *
1373 *      if (!d_unhashed(dentry))
1374 *              return -EBUSY;
1375 *
1376 * if it cannot handle the case of removing a directory
1377 * that is still in use by something else..
1378 */
1379static void d_unhash(struct dentry *dentry)
1380{
1381        dget(dentry);
1382        spin_lock(&dcache_lock);
1383        switch (atomic_read(&dentry->d_count)) {
1384        default:
1385                spin_unlock(&dcache_lock);
1386                shrink_dcache_parent(dentry);
1387                spin_lock(&dcache_lock);
1388                if (atomic_read(&dentry->d_count) != 2)
1389                        break;
1390        case 2:
1391                list_del_init(&dentry->d_hash);
1392        }
1393        spin_unlock(&dcache_lock);
1394}
1395
1396int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1397{
1398        int error;
1399
1400        error = may_delete(dir, dentry, 1);
1401        if (error)
1402                return error;
1403
1404        if (!dir->i_op || !dir->i_op->rmdir)
1405                return -EPERM;
1406
1407        DQUOT_INIT(dir);
1408
1409        double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1410        d_unhash(dentry);
1411        if (d_mountpoint(dentry))
1412                error = -EBUSY;
1413        else {
1414                lock_kernel();
1415                error = dir->i_op->rmdir(dir, dentry);
1416                unlock_kernel();
1417                if (!error)
1418                        dentry->d_inode->i_flags |= S_DEAD;
1419        }
1420        double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1421        if (!error) {
1422                inode_dir_notify(dir, DN_DELETE);
1423                d_delete(dentry);
1424        }
1425        dput(dentry);
1426
1427        return error;
1428}
1429
1430asmlinkage long sys_rmdir(const char * pathname)
1431{
1432        int error = 0;
1433        char * name;
1434        struct dentry *dentry;
1435        struct nameidata nd;
1436
1437        name = getname(pathname);
1438        if(IS_ERR(name))
1439                return PTR_ERR(name);
1440
1441        error = path_lookup(name, LOOKUP_PARENT, &nd);
1442        if (error)
1443                goto exit;
1444
1445        switch(nd.last_type) {
1446                case LAST_DOTDOT:
1447                        error = -ENOTEMPTY;
1448                        goto exit1;
1449                case LAST_DOT:
1450                        error = -EINVAL;
1451                        goto exit1;
1452                case LAST_ROOT:
1453                        error = -EBUSY;
1454                        goto exit1;
1455        }
1456        down(&nd.dentry->d_inode->i_sem);
1457        dentry = lookup_hash(&nd.last, nd.dentry);
1458        error = PTR_ERR(dentry);
1459        if (!IS_ERR(dentry)) {
1460                error = vfs_rmdir(nd.dentry->d_inode, dentry);
1461                dput(dentry);
1462        }
1463        up(&nd.dentry->d_inode->i_sem);
1464exit1:
1465        path_release(&nd);
1466exit:
1467        putname(name);
1468        return error;
1469}
1470
1471int vfs_unlink(struct inode *dir, struct dentry *dentry)
1472{
1473        int error;
1474
1475        down(&dir->i_zombie);
1476        error = may_delete(dir, dentry, 0);
1477        if (!error) {
1478                error = -EPERM;
1479                if (dir->i_op && dir->i_op->unlink) {
1480                        DQUOT_INIT(dir);
1481                        if (d_mountpoint(dentry))
1482                                error = -EBUSY;
1483                        else {
1484                                lock_kernel();
1485                                error = dir->i_op->unlink(dir, dentry);
1486                                unlock_kernel();
1487                                if (!error)
1488                                        d_delete(dentry);
1489                        }
1490                }
1491        }
1492        up(&dir->i_zombie);
1493        if (!error)
1494                inode_dir_notify(dir, DN_DELETE);
1495        return error;
1496}
1497
1498asmlinkage long sys_unlink(const char * pathname)
1499{
1500        int error = 0;
1501        char * name;
1502        struct dentry *dentry;
1503        struct nameidata nd;
1504
1505        name = getname(pathname);
1506        if(IS_ERR(name))
1507                return PTR_ERR(name);
1508
1509        error = path_lookup(name, LOOKUP_PARENT, &nd);
1510        if (error)
1511                goto exit;
1512        error = -EISDIR;
1513        if (nd.last_type != LAST_NORM)
1514                goto exit1;
1515        down(&nd.dentry->d_inode->i_sem);
1516        dentry = lookup_hash(&nd.last, nd.dentry);
1517        error = PTR_ERR(dentry);
1518        if (!IS_ERR(dentry)) {
1519                /* Why not before? Because we want correct error value */
1520                if (nd.last.name[nd.last.len])
1521                        goto slashes;
1522                error = vfs_unlink(nd.dentry->d_inode, dentry);
1523        exit2:
1524                dput(dentry);
1525        }
1526        up(&nd.dentry->d_inode->i_sem);
1527exit1:
1528        path_release(&nd);
1529exit:
1530        putname(name);
1531
1532        return error;
1533
1534slashes:
1535        error = !dentry->d_inode ? -ENOENT :
1536                S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1537        goto exit2;
1538}
1539
1540int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1541{
1542        int error;
1543
1544        down(&dir->i_zombie);
1545        error = may_create(dir, dentry);
1546        if (error)
1547                goto exit_lock;
1548
1549        error = -EPERM;
1550        if (!dir->i_op || !dir->i_op->symlink)
1551                goto exit_lock;
1552
1553        DQUOT_INIT(dir);
1554        lock_kernel();
1555        error = dir->i_op->symlink(dir, dentry, oldname);
1556        unlock_kernel();
1557
1558exit_lock:
1559        up(&dir->i_zombie);
1560        if (!error)
1561                inode_dir_notify(dir, DN_CREATE);
1562        return error;
1563}
1564
1565asmlinkage long sys_symlink(const char * oldname, const char * newname)
1566{
1567        int error = 0;
1568        char * from;
1569        char * to;
1570
1571        from = getname(oldname);
1572        if(IS_ERR(from))
1573                return PTR_ERR(from);
1574        to = getname(newname);
1575        error = PTR_ERR(to);
1576        if (!IS_ERR(to)) {
1577                struct dentry *dentry;
1578                struct nameidata nd;
1579
1580                error = path_lookup(to, LOOKUP_PARENT, &nd);
1581                if (error)
1582                        goto out;
1583                dentry = lookup_create(&nd, 0);
1584                error = PTR_ERR(dentry);
1585                if (!IS_ERR(dentry)) {
1586                        error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1587                        dput(dentry);
1588                }
1589                up(&nd.dentry->d_inode->i_sem);
1590                path_release(&nd);
1591out:
1592                putname(to);
1593        }
1594        putname(from);
1595        return error;
1596}
1597
1598int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1599{
1600        struct inode *inode;
1601        int error;
1602
1603        down(&dir->i_zombie);
1604        error = -ENOENT;
1605        inode = old_dentry->d_inode;
1606        if (!inode)
1607                goto exit_lock;
1608
1609        error = may_create(dir, new_dentry);
1610        if (error)
1611                goto exit_lock;
1612
1613        error = -EXDEV;
1614        if (dir->i_dev != inode->i_dev)
1615                goto exit_lock;
1616
1617        /*
1618         * A link to an append-only or immutable file cannot be created.
1619         */
1620        error = -EPERM;
1621        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1622                goto exit_lock;
1623        if (!dir->i_op || !dir->i_op->link)
1624                goto exit_lock;
1625
1626        DQUOT_INIT(dir);
1627        lock_kernel();
1628        error = dir->i_op->link(old_dentry, dir, new_dentry);
1629        unlock_kernel();
1630
1631exit_lock:
1632        up(&dir->i_zombie);
1633        if (!error)
1634                inode_dir_notify(dir, DN_CREATE);
1635        return error;
1636}
1637
1638/*
1639 * Hardlinks are often used in delicate situations.  We avoid
1640 * security-related surprises by not following symlinks on the
1641 * newname.  --KAB
1642 *
1643 * We don't follow them on the oldname either to be compatible
1644 * with linux 2.0, and to avoid hard-linking to directories
1645 * and other special files.  --ADM
1646 */
1647asmlinkage long sys_link(const char * oldname, const char * newname)
1648{
1649        int error;
1650        char * to;
1651
1652        to = getname(newname);
1653        error = PTR_ERR(to);
1654        if (!IS_ERR(to)) {
1655                struct dentry *new_dentry;
1656                struct nameidata nd, old_nd;
1657
1658                error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
1659                if (error)
1660                        goto exit;
1661                error = path_lookup(to, LOOKUP_PARENT, &nd);
1662                if (error)
1663                        goto out;
1664                error = -EXDEV;
1665                if (old_nd.mnt != nd.mnt)
1666                        goto out_release;
1667                new_dentry = lookup_create(&nd, 0);
1668                error = PTR_ERR(new_dentry);
1669                if (!IS_ERR(new_dentry)) {
1670                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1671                        dput(new_dentry);
1672                }
1673                up(&nd.dentry->d_inode->i_sem);
1674out_release:
1675                path_release(&nd);
1676out:
1677                path_release(&old_nd);
1678exit:
1679                putname(to);
1680        }
1681        return error;
1682}
1683
1684/*
1685 * The worst of all namespace operations - renaming directory. "Perverted"
1686 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1687 * Problems:
1688 *      a) we can get into loop creation. Check is done in is_subdir().
1689 *      b) race potential - two innocent renames can create a loop together.
1690 *         That's where 4.4 screws up. Current fix: serialization on
1691 *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1692 *         story.
1693 *      c) we have to lock _three_ objects - parents and victim (if it exists).
1694 *         And that - after we got ->i_sem on parents (until then we don't know
1695 *         whether the target exists at all, let alone whether it is a directory
1696 *         or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1697 *         on link creation/removal of any kind. And taken (without ->i_sem) on
1698 *         directory that will be removed (both in rmdir() and here).
1699 *      d) some filesystems don't support opened-but-unlinked directories,
1700 *         either because of layout or because they are not ready to deal with
1701 *         all cases correctly. The latter will be fixed (taking this sort of
1702 *         stuff into VFS), but the former is not going away. Solution: the same
1703 *         trick as in rmdir().
1704 *      e) conversion from fhandle to dentry may come in the wrong moment - when
1705 *         we are removing the target. Solution: we will have to grab ->i_zombie
1706 *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1707 *         ->i_sem on parents, which works but leads to some truely excessive
1708 *         locking].
1709 */
1710int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1711               struct inode *new_dir, struct dentry *new_dentry)
1712{
1713        int error;
1714        struct inode *target;
1715
1716        if (old_dentry->d_inode == new_dentry->d_inode)
1717                return 0;
1718
1719        error = may_delete(old_dir, old_dentry, 1);
1720        if (error)
1721                return error;
1722
1723        if (new_dir->i_dev != old_dir->i_dev)
1724                return -EXDEV;
1725
1726        if (!new_dentry->d_inode)
1727                error = may_create(new_dir, new_dentry);
1728        else
1729                error = may_delete(new_dir, new_dentry, 1);
1730        if (error)
1731                return error;
1732
1733        if (!old_dir->i_op || !old_dir->i_op->rename)
1734                return -EPERM;
1735
1736        /*
1737         * If we are going to change the parent - check write permissions,
1738         * we'll need to flip '..'.
1739         */
1740        if (new_dir != old_dir) {
1741                error = permission(old_dentry->d_inode, MAY_WRITE);
1742        }
1743        if (error)
1744                return error;
1745
1746        DQUOT_INIT(old_dir);
1747        DQUOT_INIT(new_dir);
1748        down(&old_dir->i_sb->s_vfs_rename_sem);
1749        error = -EINVAL;
1750        if (is_subdir(new_dentry, old_dentry))
1751                goto out_unlock;
1752        /* Don't eat your daddy, dear... */
1753        /* This also avoids locking issues */
1754        if (old_dentry->d_parent == new_dentry)
1755                goto out_unlock;
1756        target = new_dentry->d_inode;
1757        if (target) { /* Hastur! Hastur! Hastur! */
1758                triple_down(&old_dir->i_zombie,
1759                            &new_dir->i_zombie,
1760                            &target->i_zombie);
1761                d_unhash(new_dentry);
1762        } else
1763                double_down(&old_dir->i_zombie,
1764                            &new_dir->i_zombie);
1765        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1766                error = -EBUSY;
1767        else 
1768                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1769        if (target) {
1770                if (!error)
1771                        target->i_flags |= S_DEAD;
1772                triple_up(&old_dir->i_zombie,
1773                          &new_dir->i_zombie,
1774                          &target->i_zombie);
1775                if (d_unhashed(new_dentry))
1776                        d_rehash(new_dentry);
1777                dput(new_dentry);
1778        } else
1779                double_up(&old_dir->i_zombie,
1780                          &new_dir->i_zombie);
1781                
1782        if (!error)
1783                d_move(old_dentry,new_dentry);
1784out_unlock:
1785        up(&old_dir->i_sb->s_vfs_rename_sem);
1786        return error;
1787}
1788
1789int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1790               struct inode *new_dir, struct dentry *new_dentry)
1791{
1792        int error;
1793
1794        if (old_dentry->d_inode == new_dentry->d_inode)
1795                return 0;
1796
1797        error = may_delete(old_dir, old_dentry, 0);
1798        if (error)
1799                return error;
1800
1801        if (new_dir->i_dev != old_dir->i_dev)
1802                return -EXDEV;
1803
1804        if (!new_dentry->d_inode)
1805                error = may_create(new_dir, new_dentry);
1806        else
1807                error = may_delete(new_dir, new_dentry, 0);
1808        if (error)
1809                return error;
1810
1811        if (!old_dir->i_op || !old_dir->i_op->rename)
1812                return -EPERM;
1813
1814        DQUOT_INIT(old_dir);
1815        DQUOT_INIT(new_dir);
1816        double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1817        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1818                error = -EBUSY;
1819        else
1820                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1821        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1822        if (error)
1823                return error;
1824        /* The following d_move() should become unconditional */
1825        if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1826                d_move(old_dentry, new_dentry);
1827        }
1828        return 0;
1829}
1830
1831int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1832               struct inode *new_dir, struct dentry *new_dentry)
1833{
1834        int error;
1835        if (S_ISDIR(old_dentry->d_inode->i_mode))
1836                error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1837        else
1838                error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1839        if (!error) {
1840                if (old_dir == new_dir)
1841                        inode_dir_notify(old_dir, DN_RENAME);
1842                else {
1843                        inode_dir_notify(old_dir, DN_DELETE);
1844                        inode_dir_notify(new_dir, DN_CREATE);
1845                }
1846        }
1847        return error;
1848}
1849
1850static inline int do_rename(const char * oldname, const char * newname)
1851{
1852        int error = 0;
1853        struct dentry * old_dir, * new_dir;
1854        struct dentry * old_dentry, *new_dentry;
1855        struct nameidata oldnd, newnd;
1856
1857        error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
1858        if (error)
1859                goto exit;
1860
1861        error = path_lookup(newname, LOOKUP_PARENT, &newnd);
1862        if (error)
1863                goto exit1;
1864
1865        error = -EXDEV;
1866        if (oldnd.mnt != newnd.mnt)
1867                goto exit2;
1868
1869        old_dir = oldnd.dentry;
1870        error = -EBUSY;
1871        if (oldnd.last_type != LAST_NORM)
1872                goto exit2;
1873
1874        new_dir = newnd.dentry;
1875        if (newnd.last_type != LAST_NORM)
1876                goto exit2;
1877
1878        double_lock(new_dir, old_dir);
1879
1880        old_dentry = lookup_hash(&oldnd.last, old_dir);
1881        error = PTR_ERR(old_dentry);
1882        if (IS_ERR(old_dentry))
1883                goto exit3;
1884        /* source must exist */
1885        error = -ENOENT;
1886        if (!old_dentry->d_inode)
1887                goto exit4;
1888        /* unless the source is a directory trailing slashes give -ENOTDIR */
1889        if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1890                error = -ENOTDIR;
1891                if (oldnd.last.name[oldnd.last.len])
1892                        goto exit4;
1893                if (newnd.last.name[newnd.last.len])
1894                        goto exit4;
1895        }
1896        new_dentry = lookup_hash(&newnd.last, new_dir);
1897        error = PTR_ERR(new_dentry);
1898        if (IS_ERR(new_dentry))
1899                goto exit4;
1900
1901        lock_kernel();
1902        error = vfs_rename(old_dir->d_inode, old_dentry,
1903                                   new_dir->d_inode, new_dentry);
1904        unlock_kernel();
1905
1906        dput(new_dentry);
1907exit4:
1908        dput(old_dentry);
1909exit3:
1910        double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1911exit2:
1912        path_release(&newnd);
1913exit1:
1914        path_release(&oldnd);
1915exit:
1916        return error;
1917}
1918
1919asmlinkage long sys_rename(const char * oldname, const char * newname)
1920{
1921        int error;
1922        char * from;
1923        char * to;
1924
1925        from = getname(oldname);
1926        if(IS_ERR(from))
1927                return PTR_ERR(from);
1928        to = getname(newname);
1929        error = PTR_ERR(to);
1930        if (!IS_ERR(to)) {
1931                error = do_rename(from,to);
1932                putname(to);
1933        }
1934        putname(from);
1935        return error;
1936}
1937
1938int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1939{
1940        int len;
1941
1942        len = PTR_ERR(link);
1943        if (IS_ERR(link))
1944                goto out;
1945
1946        len = strlen(link);
1947        if (len > (unsigned) buflen)
1948                len = buflen;
1949        if (copy_to_user(buffer, link, len))
1950                len = -EFAULT;
1951out:
1952        return len;
1953}
1954
1955static inline int
1956__vfs_follow_link(struct nameidata *nd, const char *link)
1957{
1958        int res = 0;
1959        char *name;
1960        if (IS_ERR(link))
1961                goto fail;
1962
1963        if (*link == '/') {
1964                path_release(nd);
1965                if (!walk_init_root(link, nd))
1966                        /* weird __emul_prefix() stuff did it */
1967                        goto out;
1968        }
1969        res = link_path_walk(link, nd);
1970out:
1971        if (current->link_count || res || nd->last_type!=LAST_NORM)
1972                return res;
1973        /*
1974         * If it is an iterative symlinks resolution in open_namei() we
1975         * have to copy the last component. And all that crap because of
1976         * bloody create() on broken symlinks. Furrfu...
1977         */
1978        name = __getname();
1979        if (!name) {
1980                path_release(nd);
1981                return -ENOMEM;
1982        }
1983        strcpy(name, nd->last.name);
1984        nd->last.name = name;
1985        return 0;
1986fail:
1987        path_release(nd);
1988        return PTR_ERR(link);
1989}
1990
1991int vfs_follow_link(struct nameidata *nd, const char *link)
1992{
1993        return __vfs_follow_link(nd, link);
1994}
1995
1996/* get the link contents into pagecache */
1997static char *page_getlink(struct dentry * dentry, struct page **ppage)
1998{
1999        struct page * page;
2000        struct address_space *mapping = dentry->d_inode->i_mapping;
2001        page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2002                                NULL);
2003        if (IS_ERR(page))
2004                goto sync_fail;
2005        wait_on_page(page);
2006        if (!Page_Uptodate(page))
2007                goto async_fail;
2008        *ppage = page;
2009        return kmap(page);
2010
2011async_fail:
2012        page_cache_release(page);
2013        return ERR_PTR(-EIO);
2014
2015sync_fail:
2016        return (char*)page;
2017}
2018
2019int page_readlink(struct dentry *dentry, char *buffer, int buflen)
2020{
2021        struct page *page = NULL;
2022        char *s = page_getlink(dentry, &page);
2023        int res = vfs_readlink(dentry,buffer,buflen,s);
2024        if (page) {
2025                kunmap(page);
2026                page_cache_release(page);
2027        }
2028        return res;
2029}
2030
2031int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2032{
2033        struct page *page = NULL;
2034        char *s = page_getlink(dentry, &page);
2035        int res = __vfs_follow_link(nd, s);
2036        if (page) {
2037                kunmap(page);
2038                page_cache_release(page);
2039        }
2040        return res;
2041}
2042
2043struct inode_operations page_symlink_inode_operations = {
2044        readlink:       page_readlink,
2045        follow_link:    page_follow_link,
2046};
2047
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.