linux-old/fs/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/namei.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 * Some corrections by tytso.
   9 */
  10
  11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12 * lookup logic.
  13 */
  14
  15#include <linux/mm.h>
  16#include <linux/proc_fs.h>
  17#include <linux/smp_lock.h>
  18#include <linux/quotaops.h>
  19
  20#include <asm/uaccess.h>
  21#include <asm/unaligned.h>
  22#include <asm/semaphore.h>
  23#include <asm/page.h>
  24#include <asm/pgtable.h>
  25
  26#include <asm/namei.h>
  27
  28/* This can be removed after the beta phase. */
  29#define CACHE_SUPERVISE /* debug the correctness of dcache entries */
  30#undef DEBUG            /* some other debugging */
  31
  32
  33#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  34
  35/* [Feb-1997 T. Schoebel-Theuer]
  36 * Fundamental changes in the pathname lookup mechanisms (namei)
  37 * were necessary because of omirr.  The reason is that omirr needs
  38 * to know the _real_ pathname, not the user-supplied one, in case
  39 * of symlinks (and also when transname replacements occur).
  40 *
  41 * The new code replaces the old recursive symlink resolution with
  42 * an iterative one (in case of non-nested symlink chains).  It does
  43 * this with calls to <fs>_follow_link().
  44 * As a side effect, dir_namei(), _namei() and follow_link() are now 
  45 * replaced with a single function lookup_dentry() that can handle all 
  46 * the special cases of the former code.
  47 *
  48 * With the new dcache, the pathname is stored at each inode, at least as
  49 * long as the refcount of the inode is positive.  As a side effect, the
  50 * size of the dcache depends on the inode cache and thus is dynamic.
  51 *
  52 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  53 * resolution to correspond with current state of the code.
  54 *
  55 * Note that the symlink resolution is not *completely* iterative.
  56 * There is still a significant amount of tail- and mid- recursion in
  57 * the algorithm.  Also, note that <fs>_readlink() is not used in
  58 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  59 * may return different results than <fs>_follow_link().  Many virtual
  60 * filesystems (including /proc) exhibit this behavior.
  61 */
  62
  63/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  64 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  65 * and the name already exists in form of a symlink, try to create the new
  66 * name indicated by the symlink. The old code always complained that the
  67 * name already exists, due to not following the symlink even if its target
  68 * is nonexistent.  The new semantics affects also mknod() and link() when
  69 * the name is a symlink pointing to a non-existant name.
  70 *
  71 * I don't know which semantics is the right one, since I have no access
  72 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  73 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  74 * "old" one. Personally, I think the new semantics is much more logical.
  75 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  76 * file does succeed in both HP-UX and SunOs, but not in Solaris
  77 * and in the old Linux semantics.
  78 */
  79
  80/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  81 * semantics.  See the comments in "open_namei" and "do_link" below.
  82 *
  83 * [10-Sep-98 Alan Modra] Another symlink change.
  84 */
  85
  86/* In order to reduce some races, while at the same time doing additional
  87 * checking and hopefully speeding things up, we copy filenames to the
  88 * kernel data space before using them..
  89 *
  90 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
  91 */
  92static inline int do_getname(const char *filename, char *page)
  93{
  94        int retval;
  95        unsigned long len = PAGE_SIZE;
  96
  97        if ((unsigned long) filename >= TASK_SIZE) {
  98                if (!segment_eq(get_fs(), KERNEL_DS))
  99                        return -EFAULT;
 100        } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
 101                len = TASK_SIZE - (unsigned long) filename;
 102
 103        retval = strncpy_from_user((char *)page, filename, len);
 104        if (retval > 0) {
 105                if (retval < len)
 106                        return 0;
 107                return -ENAMETOOLONG;
 108        } else if (!retval)
 109                retval = -ENOENT;
 110        return retval;
 111}
 112
 113char * getname(const char * filename)
 114{
 115        char *tmp, *result;
 116
 117        result = ERR_PTR(-ENOMEM);
 118        tmp = __getname();
 119        if (tmp)  {
 120                int retval = do_getname(filename, tmp);
 121
 122                result = tmp;
 123                if (retval < 0) {
 124                        putname(tmp);
 125                        result = ERR_PTR(retval);
 126                }
 127        }
 128        return result;
 129}
 130
 131/*
 132 *      permission()
 133 *
 134 * is used to check for read/write/execute permissions on a file.
 135 * We use "fsuid" for this, letting us set arbitrary permissions
 136 * for filesystem access without changing the "normal" uids which
 137 * are used for other things..
 138 */
 139int vfs_permission(struct inode * inode,int mask)
 140{
 141        int mode = inode->i_mode;
 142
 143        if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
 144                 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 145                return -EROFS; /* Nobody gets write access to a read-only fs */
 146        else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
 147                return -EACCES; /* Nobody gets write access to an immutable file */
 148        else if (current->fsuid == inode->i_uid)
 149                mode >>= 6;
 150        else if (in_group_p(inode->i_gid))
 151                mode >>= 3;
 152        if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
 153                return 0;
 154        /* read and search access */
 155        if ((mask == S_IROTH) ||
 156            (S_ISDIR(inode->i_mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
 157                if (capable(CAP_DAC_READ_SEARCH))
 158                        return 0;
 159        return -EACCES;
 160}
 161
 162int permission(struct inode * inode,int mask)
 163{
 164        if (inode->i_op && inode->i_op->permission)
 165                return inode->i_op->permission(inode, mask);
 166        return vfs_permission(inode, mask);
 167}
 168
 169/*
 170 * get_write_access() gets write permission for a file.
 171 * put_write_access() releases this write permission.
 172 * This is used for regular files.
 173 * We cannot support write (and maybe mmap read-write shared) accesses and
 174 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 175 * can have the following values:
 176 * 0: no writers, no VM_DENYWRITE mappings
 177 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 178 * > 0: (i_writecount) users are writing to the file.
 179 */
 180int get_write_access(struct inode * inode)
 181{
 182        if (inode->i_writecount < 0)
 183                return -ETXTBSY;
 184        inode->i_writecount++;
 185        return 0;
 186}
 187
 188void put_write_access(struct inode * inode)
 189{
 190        inode->i_writecount--;
 191}
 192
 193/*
 194 * "." and ".." are special - ".." especially so because it has to be able
 195 * to know about the current root directory and parent relationships
 196 */
 197static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name)
 198{
 199        struct dentry *result = NULL;
 200        if (name->name[0] == '.') {
 201                switch (name->len) {
 202                default:
 203                        break;
 204                case 2: 
 205                        if (name->name[1] != '.')
 206                                break;
 207
 208                        if (parent != current->fs->root)
 209                                parent = parent->d_covers->d_parent;
 210                        /* fallthrough */
 211                case 1:
 212                        result = parent;
 213                }
 214        }
 215        return dget(result);
 216}
 217
 218/*
 219 * Internal lookup() using the new generic dcache.
 220 */
 221static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 222{
 223        struct dentry * dentry = d_lookup(parent, name);
 224
 225        if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 226                if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 227                        dput(dentry);
 228                        dentry = NULL;
 229                }
 230        }
 231        return dentry;
 232}
 233
 234/*
 235 * This is called when everything else fails, and we actually have
 236 * to go to the low-level filesystem to find out what we should do..
 237 *
 238 * We get the directory semaphore, and after getting that we also
 239 * make sure that nobody added the entry to the dcache in the meantime..
 240 */
 241static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 242{
 243        struct dentry * result;
 244        struct inode *dir = parent->d_inode;
 245
 246        down(&dir->i_sem);
 247        /*
 248         * First re-do the cached lookup just in case it was created
 249         * while we waited for the directory semaphore..
 250         *
 251         * FIXME! This could use version numbering or similar to
 252         * avoid unnecessary cache lookups.
 253         */
 254        result = d_lookup(parent, name);
 255        if (!result) {
 256                struct dentry * dentry = d_alloc(parent, name);
 257                result = ERR_PTR(-ENOMEM);
 258                if (dentry) {
 259                        result = dir->i_op->lookup(dir, dentry);
 260                        if (result)
 261                                dput(dentry);
 262                        else
 263                                result = dentry;
 264                }
 265                up(&dir->i_sem);
 266                return result;
 267        }
 268
 269        /*
 270         * Uhhuh! Nasty case: the cache was re-populated while
 271         * we waited on the semaphore. Need to revalidate, but
 272         * we're going to return this entry regardless (same
 273         * as if it was busy).
 274         */
 275        up(&dir->i_sem);
 276        if (result->d_op && result->d_op->d_revalidate)
 277                result->d_op->d_revalidate(result, flags);
 278        return result;
 279}
 280/*
 281 * Yes, this really increments the link_count by 5, and
 282 * decrements it by 4. Together with checking against 25,
 283 * this limits recursive symlink follows to 5, while
 284 * limiting consecutive symlinks to 25.
 285 *
 286 * Without that kind of total limit, nasty chains of consecutive
 287 * symlinks can cause almost arbitrarily long lookups.
 288 */
 289
 290static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow)
 291{
 292        struct inode * inode = dentry->d_inode;
 293
 294        if ((follow & LOOKUP_FOLLOW)
 295            && inode && inode->i_op && inode->i_op->follow_link) {
 296                if (current->link_count < 25) {
 297                        struct dentry * result;
 298
 299                        if (current->need_resched) {
 300                                current->state = TASK_RUNNING;  
 301                                schedule();
 302                        }
 303                        current->link_count += 5;
 304                        /* This eats the base */
 305                        result = inode->i_op->follow_link(dentry, base, follow|LOOKUP_INSYMLINK);
 306                        current->link_count -= 4;
 307                        dput(dentry);
 308                        return result;
 309                }
 310                dput(dentry);
 311                dentry = ERR_PTR(-ELOOP);
 312        }
 313        dput(base);
 314        return dentry;
 315}
 316
 317static inline struct dentry * follow_mount(struct dentry * dentry)
 318{
 319        struct dentry * mnt = dentry->d_mounts;
 320
 321        if (mnt != dentry) {
 322                dget(mnt);
 323                dput(dentry);
 324                dentry = mnt;
 325        }
 326        return dentry;
 327}
 328
 329/*
 330 * Name resolution.
 331 *
 332 * This is the basic name resolution function, turning a pathname
 333 * into the final dentry.
 334 */
 335struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags)
 336{
 337        struct dentry * dentry;
 338        struct inode *inode;
 339
 340        if (!(lookup_flags & LOOKUP_INSYMLINK))
 341                current->link_count=0;
 342        if (*name == '/') {
 343                if (base)
 344                        dput(base);
 345                do {
 346                        name++;
 347                } while (*name == '/');
 348                __prefix_lookup_dentry(name, lookup_flags);
 349                base = dget(current->fs->root);
 350        } else if (!base) {
 351                base = dget(current->fs->pwd);
 352        }
 353
 354        if (!*name)
 355                goto return_base;
 356
 357        inode = base->d_inode;
 358        lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK;
 359
 360        /* At this point we know we have a real path component. */
 361        for(;;) {
 362                int err;
 363                unsigned long hash;
 364                struct qstr this;
 365                unsigned int flags;
 366                unsigned int c;
 367
 368                err = permission(inode, MAY_EXEC);
 369                dentry = ERR_PTR(err);
 370                if (err)
 371                        break;
 372
 373                this.name = name;
 374                c = *(const unsigned char *)name;
 375
 376                hash = init_name_hash();
 377                do {
 378                        name++;
 379                        hash = partial_name_hash(c, hash);
 380                        c = *(const unsigned char *)name;
 381                } while (c && (c != '/'));
 382                this.len = name - (const char *) this.name;
 383                this.hash = end_name_hash(hash);
 384
 385                /* remove trailing slashes? */
 386                flags = lookup_flags;
 387                if (c) {
 388                        char tmp;
 389
 390                        flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 391                        do {
 392                                tmp = *++name;
 393                        } while (tmp == '/');
 394                        if (tmp)
 395                                flags |= LOOKUP_CONTINUE;
 396                }
 397
 398                /*
 399                 * See if the low-level filesystem might want
 400                 * to use its own hash..
 401                 */
 402                if (base->d_op && base->d_op->d_hash) {
 403                        int error;
 404                        error = base->d_op->d_hash(base, &this);
 405                        if (error < 0) {
 406                                dentry = ERR_PTR(error);
 407                                break;
 408                        }
 409                }
 410
 411                /* This does the actual lookups.. */
 412                dentry = reserved_lookup(base, &this);
 413                if (!dentry) {
 414                        dentry = cached_lookup(base, &this, flags);
 415                        if (!dentry) {
 416                                dentry = real_lookup(base, &this, flags);
 417                                if (IS_ERR(dentry))
 418                                        break;
 419                        }
 420                }
 421
 422                /* Check mountpoints.. */
 423                dentry = follow_mount(dentry);
 424
 425                base = do_follow_link(base, dentry, flags);
 426                if (IS_ERR(base))
 427                        goto return_base;
 428
 429                inode = base->d_inode;
 430                if (flags & LOOKUP_DIRECTORY) {
 431                        if (!inode)
 432                                goto no_inode;
 433                        dentry = ERR_PTR(-ENOTDIR); 
 434                        if (!inode->i_op || !inode->i_op->lookup)
 435                                break;
 436                        if (flags & LOOKUP_CONTINUE)
 437                                continue;
 438                }
 439return_base:
 440                return base;
 441/*
 442 * The case of a nonexisting file is special.
 443 *
 444 * In the middle of a pathname lookup (ie when
 445 * LOOKUP_CONTINUE is set), it's an obvious
 446 * error and returns ENOENT.
 447 *
 448 * At the end of a pathname lookup it's legal,
 449 * and we return a negative dentry. However, we
 450 * get here only if there were trailing slashes,
 451 * which is legal only if we know it's supposed
 452 * to be a directory (ie "mkdir"). Thus the
 453 * LOOKUP_SLASHOK flag.
 454 */
 455no_inode:
 456                dentry = ERR_PTR(-ENOENT);
 457                if (flags & LOOKUP_CONTINUE)
 458                        break;
 459                if (flags & LOOKUP_SLASHOK)
 460                        goto return_base;
 461                break;
 462        }
 463        dput(base);
 464        return dentry;
 465}
 466
 467/*
 468 *      namei()
 469 *
 470 * is used by most simple commands to get the inode of a specified name.
 471 * Open, link etc use their own routines, but this is enough for things
 472 * like 'chmod' etc.
 473 *
 474 * namei exists in two versions: namei/lnamei. The only difference is
 475 * that namei follows links, while lnamei does not.
 476 */
 477struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
 478{
 479        char *name;
 480        struct dentry *dentry;
 481
 482        name = getname(pathname);
 483        dentry = (struct dentry *) name;
 484        if (!IS_ERR(name)) {
 485                dentry = lookup_dentry(name, NULL, lookup_flags);
 486                putname(name);
 487                if (!IS_ERR(dentry)) {
 488                        if (!dentry->d_inode) {
 489                                dput(dentry);
 490                                dentry = ERR_PTR(-ENOENT);
 491                        }
 492                }
 493        }
 494        return dentry;
 495}
 496
 497/*
 498 * It's inline, so penalty for filesystems that don't use sticky bit is
 499 * minimal.
 500 */
 501static inline int check_sticky(struct inode *dir, struct inode *inode)
 502{
 503        if (!(dir->i_mode & S_ISVTX))
 504                return 0;
 505        if (inode->i_uid == current->fsuid)
 506                return 0;
 507        if (dir->i_uid == current->fsuid)
 508                return 0;
 509        return !capable(CAP_FOWNER);
 510}
 511
 512/*
 513 *      Check whether we can remove a link victim from directory dir, check
 514 *  whether the type of victim is right.
 515 *  1. We can't do it if dir is read-only (done in permission())
 516 *  2. We should have write and exec permissions on dir
 517 *  3. We can't remove anything from append-only dir
 518 *  4. We can't do anything with immutable dir (done in permission())
 519 *  5. If the sticky bit on dir is set we should either
 520 *      a. be owner of dir, or
 521 *      b. be owner of victim, or
 522 *      c. have CAP_FOWNER capability
 523 *  6. If the victim is append-only or immutable we can't do antyhing with
 524 *     links pointing to it.
 525 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 526 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 527 *  9. We can't remove a root or mountpoint.
 528 */
 529static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 530{
 531        int error;
 532        if (!victim->d_inode || victim->d_parent->d_inode != dir)
 533                return -ENOENT;
 534        error = permission(dir,MAY_WRITE | MAY_EXEC);
 535        if (error)
 536                return error;
 537        if (IS_APPEND(dir))
 538                return -EPERM;
 539        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 540            IS_IMMUTABLE(victim->d_inode))
 541                return -EPERM;
 542        if (isdir) {
 543                if (!S_ISDIR(victim->d_inode->i_mode))
 544                        return -ENOTDIR;
 545                if (IS_ROOT(victim))
 546                        return -EBUSY;
 547                if (victim->d_mounts != victim->d_covers)
 548                        return -EBUSY;
 549        } else if (S_ISDIR(victim->d_inode->i_mode))
 550                return -EISDIR;
 551        return 0;
 552}
 553
 554/*      Check whether we can create an object with dentry child in directory
 555 *  dir.
 556 *  1. We can't do it if child already exists (open has special treatment for
 557 *     this case, but since we are inlined it's OK)
 558 *  2. We can't do it if dir is read-only (done in permission())
 559 *  3. We should have write and exec permissions on dir
 560 *  4. We can't do it if dir is immutable (done in permission())
 561 */
 562static inline int may_create(struct inode *dir, struct dentry *child) {
 563        if (child->d_inode)
 564                return -EEXIST;
 565        return permission(dir,MAY_WRITE | MAY_EXEC);
 566}
 567
 568static inline struct dentry *get_parent(struct dentry *dentry)
 569{
 570        return dget(dentry->d_parent);
 571}
 572
 573static inline void unlock_dir(struct dentry *dir)
 574{
 575        up(&dir->d_inode->i_sem);
 576        dput(dir);
 577}
 578
 579/*
 580 * We need to do a check-parent every time
 581 * after we have locked the parent - to verify
 582 * that the parent is still our parent and
 583 * that we are still hashed onto it..
 584 *
 585 * This is requied in case two processes race
 586 * on removing (or moving) the same entry: the
 587 * parent lock will serialize them, but the
 588 * other process will be too late..
 589 */
 590#define check_parent(dir, dentry) \
 591        ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
 592
 593/*
 594 * Locking the parent is needed to:
 595 *  - serialize directory operations
 596 *  - make sure the parent doesn't change from
 597 *    under us in the middle of an operation.
 598 *
 599 * NOTE! Right now we'd rather use a "struct inode"
 600 * for this, but as I expect things to move toward
 601 * using dentries instead for most things it is
 602 * probably better to start with the conceptually
 603 * better interface of relying on a path of dentries.
 604 */
 605static inline struct dentry *lock_parent(struct dentry *dentry)
 606{
 607        struct dentry *dir = dget(dentry->d_parent);
 608
 609        down(&dir->d_inode->i_sem);
 610        return dir;
 611}
 612
 613/*
 614 * Whee.. Deadlock country. Happily there are only two VFS
 615 * operations that do this..
 616 */
 617static inline void double_lock(struct dentry *d1, struct dentry *d2)
 618{
 619        struct semaphore *s1 = &d1->d_inode->i_sem;
 620        struct semaphore *s2 = &d2->d_inode->i_sem;
 621
 622        if (s1 != s2) {
 623                if ((unsigned long) s1 < (unsigned long) s2) {
 624                        struct semaphore *tmp = s2;
 625                        s2 = s1; s1 = tmp;
 626                }
 627                down(s1);
 628        }
 629        down(s2);
 630}
 631
 632static inline void double_unlock(struct dentry *d1, struct dentry *d2)
 633{
 634        struct semaphore *s1 = &d1->d_inode->i_sem;
 635        struct semaphore *s2 = &d2->d_inode->i_sem;
 636
 637        up(s1);
 638        if (s1 != s2)
 639                up(s2);
 640        dput(d1);
 641        dput(d2);
 642}
 643
 644
 645/* 
 646 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 647 * reasons.
 648 *
 649 * O_DIRECTORY translates into forcing a directory lookup.
 650 */
 651static inline int lookup_flags(unsigned int f)
 652{
 653        unsigned long retval = LOOKUP_FOLLOW;
 654
 655        if (f & O_NOFOLLOW)
 656                retval &= ~LOOKUP_FOLLOW;
 657        
 658        if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 659                retval &= ~LOOKUP_FOLLOW;
 660        
 661        if (f & O_DIRECTORY)
 662                retval |= LOOKUP_DIRECTORY;
 663        
 664        return retval;
 665}
 666
 667/*
 668 *      open_namei()
 669 *
 670 * namei for open - this is in fact almost the whole open-routine.
 671 *
 672 * Note that the low bits of "flag" aren't the same as in the open
 673 * system call - they are 00 - no permissions needed
 674 *                        01 - read permission needed
 675 *                        10 - write permission needed
 676 *                        11 - read/write permissions needed
 677 * which is a lot more logical, and also allows the "no perm" needed
 678 * for symlinks (where the permissions are checked later).
 679 */
 680struct dentry * open_namei(const char * pathname, int flag, int mode)
 681{
 682        int acc_mode, error;
 683        struct inode *inode;
 684        struct dentry *dentry;
 685
 686        mode &= S_IALLUGO & ~current->fs->umask;
 687        mode |= S_IFREG;
 688
 689        dentry = lookup_dentry(pathname, NULL, lookup_flags(flag));
 690        if (IS_ERR(dentry))
 691                return dentry;
 692
 693        acc_mode = ACC_MODE(flag);
 694        if (flag & O_CREAT) {
 695                struct dentry *dir;
 696
 697                if (dentry->d_inode) {
 698                        if (!(flag & O_EXCL))
 699                                goto nocreate;
 700                        error = -EEXIST;
 701                        goto exit;
 702                }
 703
 704                dir = lock_parent(dentry);
 705                if (!check_parent(dir, dentry)) {
 706                        /*
 707                         * Really nasty race happened. What's the 
 708                         * right error code? We had a dentry, but
 709                         * before we could use it it was removed
 710                         * by somebody else. We could just re-try
 711                         * everything, I guess.
 712                         *
 713                         * ENOENT is definitely wrong.
 714                         */
 715                        error = -ENOENT;
 716                        unlock_dir(dir);
 717                        goto exit;
 718                }
 719
 720                /*
 721                 * Somebody might have created the file while we
 722                 * waited for the directory lock.. So we have to
 723                 * re-do the existence test.
 724                 */
 725                if (dentry->d_inode) {
 726                        error = 0;
 727                        if (flag & O_EXCL)
 728                                error = -EEXIST;
 729                } else if ((error = may_create(dir->d_inode, dentry)) == 0) {
 730                        if (!dir->d_inode->i_op || !dir->d_inode->i_op->create)
 731                                error = -EACCES;
 732                        else {
 733                                DQUOT_INIT(dir->d_inode);
 734                                error = dir->d_inode->i_op->create(dir->d_inode, dentry, mode);
 735                                /* Don't check for write permission, don't truncate */
 736                                acc_mode = 0;
 737                                flag &= ~O_TRUNC;
 738                        }
 739                }
 740                unlock_dir(dir);
 741                if (error)
 742                        goto exit;
 743        }
 744
 745nocreate:
 746        error = -ENOENT;
 747        inode = dentry->d_inode;
 748        if (!inode)
 749                goto exit;
 750
 751        error = -ELOOP;
 752        if (S_ISLNK(inode->i_mode))
 753                goto exit;
 754        
 755        error = -EISDIR;
 756        if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
 757                goto exit;
 758
 759        error = permission(inode,acc_mode);
 760        if (error)
 761                goto exit;
 762
 763        /*
 764         * FIFO's, sockets and device files are special: they don't
 765         * actually live on the filesystem itself, and as such you
 766         * can write to them even if the filesystem is read-only.
 767         */
 768        if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 769                flag &= ~O_TRUNC;
 770        } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
 771                error = -EACCES;
 772                if (IS_NODEV(inode))
 773                        goto exit;
 774
 775                flag &= ~O_TRUNC;
 776        } else {
 777                error = -EROFS;
 778                if (IS_RDONLY(inode) && (flag & 2))
 779                        goto exit;
 780        }
 781        /*
 782         * An append-only file must be opened in append mode for writing.
 783         */
 784        error = -EPERM;
 785        if (IS_APPEND(inode)) {
 786                if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
 787                        goto exit;
 788                if (flag & O_TRUNC)
 789                        goto exit;
 790        }
 791
 792        if (flag & O_TRUNC) {
 793                error = get_write_access(inode);
 794                if (error)
 795                        goto exit;
 796
 797                /*
 798                 * Refuse to truncate files with mandatory locks held on them.
 799                 */
 800                error = locks_verify_locked(inode);
 801                if (!error) {
 802                        DQUOT_INIT(inode);
 803                        
 804                        error = do_truncate(dentry, 0);
 805                }
 806                put_write_access(inode);
 807                if (error)
 808                        goto exit;
 809        } else
 810                if (flag & FMODE_WRITE)
 811                        DQUOT_INIT(inode);
 812
 813        return dentry;
 814
 815exit:
 816        dput(dentry);
 817        return ERR_PTR(error);
 818}
 819
 820struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
 821{
 822        int error;
 823        struct dentry *dir;
 824        struct dentry *dentry, *retval;
 825
 826        mode &= ~current->fs->umask;
 827        dentry = lookup_dentry(filename, NULL, 0);
 828        if (IS_ERR(dentry))
 829                return dentry;
 830
 831        dir = lock_parent(dentry);
 832        error = -ENOENT;
 833        if (!check_parent(dir, dentry))
 834                goto exit_lock;
 835
 836        error = may_create(dir->d_inode, dentry);
 837        if (error)
 838                goto exit_lock;
 839
 840        error = -EPERM;
 841        if (!dir->d_inode->i_op || !dir->d_inode->i_op->mknod)
 842                goto exit_lock;
 843
 844        DQUOT_INIT(dir->d_inode);
 845        error = dir->d_inode->i_op->mknod(dir->d_inode, dentry, mode, dev);
 846exit_lock:
 847        retval = ERR_PTR(error);
 848        if (!error)
 849                retval = dget(dentry);
 850        unlock_dir(dir);
 851        dput(dentry);
 852        return retval;
 853}
 854
 855asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev)
 856{
 857        int error;
 858        char * tmp;
 859
 860        lock_kernel();
 861        error = -EPERM;
 862        if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_SYS_ADMIN)))
 863                goto out;
 864        error = -EINVAL;
 865        switch (mode & S_IFMT) {
 866        case 0:
 867                mode |= S_IFREG;
 868                break;
 869        case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
 870                break;
 871        default:
 872                goto out;
 873        }
 874        tmp = getname(filename);
 875        error = PTR_ERR(tmp);
 876        if (!IS_ERR(tmp)) {
 877                struct dentry * dentry = do_mknod(tmp,mode,dev);
 878                putname(tmp);
 879                error = PTR_ERR(dentry);
 880                if (!IS_ERR(dentry)) {
 881                        dput(dentry);
 882                        error = 0;
 883                }
 884        }
 885out:
 886        unlock_kernel();
 887        return error;
 888}
 889
 890/*
 891 * Look out: this function may change a normal dentry
 892 * into a directory dentry (different size)..
 893 */
 894static inline int do_mkdir(const char * pathname, int mode)
 895{
 896        int error;
 897        struct dentry *dir;
 898        struct dentry *dentry;
 899
 900        dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK);
 901        error = PTR_ERR(dentry);
 902        if (IS_ERR(dentry))
 903                goto exit;
 904
 905        /*
 906         * EEXIST is kind of a strange error code to
 907         * return, but basically if the dentry was moved
 908         * or unlinked while we locked the parent, we
 909         * do know that it _did_ exist before, and as
 910         * such it makes perfect sense.. In contrast,
 911         * ENOENT doesn't make sense for mkdir.
 912         */
 913        dir = lock_parent(dentry);
 914        error = -EEXIST;
 915        if (!check_parent(dir, dentry))
 916                goto exit_lock;
 917
 918        error = may_create(dir->d_inode, dentry);
 919        if (error)
 920                goto exit_lock;
 921
 922        error = -EPERM;
 923        if (!dir->d_inode->i_op || !dir->d_inode->i_op->mkdir)
 924                goto exit_lock;
 925
 926        DQUOT_INIT(dir->d_inode);
 927        mode &= 0777 & ~current->fs->umask;
 928        error = dir->d_inode->i_op->mkdir(dir->d_inode, dentry, mode);
 929
 930exit_lock:
 931        unlock_dir(dir);
 932        dput(dentry);
 933exit:
 934        return error;
 935}
 936
 937asmlinkage int sys_mkdir(const char * pathname, int mode)
 938{
 939        int error;
 940        char * tmp;
 941
 942        lock_kernel();
 943        tmp = getname(pathname);
 944        error = PTR_ERR(tmp);
 945        if (!IS_ERR(tmp)) {
 946                error = do_mkdir(tmp,mode);
 947                putname(tmp);
 948        }
 949        unlock_kernel();
 950        return error;
 951}
 952
 953int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 954{
 955        int error;
 956
 957        error = may_delete(dir, dentry, 1);
 958        if (error)
 959                return error;
 960
 961        if (!dir->i_op || !dir->i_op->rmdir)
 962                return -EPERM;
 963
 964        DQUOT_INIT(dir);
 965
 966        /*
 967         * We try to drop the dentry early: we should have
 968         * a usage count of 2 if we're the only user of this
 969         * dentry, and if that is true (possibly after pruning
 970         * the dcache), then we drop the dentry now.
 971         *
 972         * A low-level filesystem can, if it choses, legally
 973         * do a
 974         *
 975         *      if (!list_empty(&dentry->d_hash))
 976         *              return -EBUSY;
 977         *
 978         * if it cannot handle the case of removing a directory
 979         * that is still in use by something else..
 980         */
 981        switch (dentry->d_count) {
 982        default:
 983                shrink_dcache_parent(dentry);
 984                if (dentry->d_count != 2)
 985                        break;
 986        case 2:
 987                d_drop(dentry);
 988        }
 989
 990        error = dir->i_op->rmdir(dir, dentry);
 991
 992        return error;
 993}
 994
 995static inline int do_rmdir(const char * name)
 996{
 997        int error;
 998        struct dentry *dir;
 999        struct dentry *dentry;
1000
1001        dentry = lookup_dentry(name, NULL, 0);
1002        error = PTR_ERR(dentry);
1003        if (IS_ERR(dentry))
1004                goto exit;
1005
1006        error = -ENOENT;
1007        if (!dentry->d_inode)
1008                goto exit_dput;
1009
1010        dir = dget(dentry->d_parent);
1011
1012        /*
1013         * The dentry->d_count stuff confuses d_delete() enough to
1014         * not kill the inode from under us while it is locked. This
1015         * wouldn't be needed, except the dentry semaphore is really
1016         * in the inode, not in the dentry..
1017         */
1018        dentry->d_count++;
1019        double_lock(dir, dentry);
1020
1021        error = -ENOENT;
1022        if (check_parent(dir, dentry))
1023                error = vfs_rmdir(dir->d_inode, dentry);
1024
1025        double_unlock(dentry, dir);
1026exit_dput:
1027        dput(dentry);
1028exit:
1029        return error;
1030}
1031
1032asmlinkage int sys_rmdir(const char * pathname)
1033{
1034        int error;
1035        char * tmp;
1036
1037        lock_kernel();
1038        tmp = getname(pathname);
1039        error = PTR_ERR(tmp);
1040        if (!IS_ERR(tmp)) {
1041                error = do_rmdir(tmp);
1042                putname(tmp);
1043        }
1044        unlock_kernel();
1045        return error;
1046}
1047
1048int vfs_unlink(struct inode *dir, struct dentry *dentry)
1049{
1050        int error;
1051
1052        error = may_delete(dir, dentry, 0);
1053        if (!error) {
1054                error = -EPERM;
1055                if (dir->i_op && dir->i_op->unlink) {
1056                        DQUOT_INIT(dir);
1057                        error = dir->i_op->unlink(dir, dentry);
1058                }
1059        }
1060        return error;
1061}
1062
1063static inline int do_unlink(const char * name)
1064{
1065        int error;
1066        struct dentry *dir;
1067        struct dentry *dentry;
1068
1069        dentry = lookup_dentry(name, NULL, 0);
1070        error = PTR_ERR(dentry);
1071        if (IS_ERR(dentry))
1072                goto exit;
1073
1074        dir = lock_parent(dentry);
1075        error = -ENOENT;
1076        if (check_parent(dir, dentry))
1077                error = vfs_unlink(dir->d_inode, dentry);
1078
1079        unlock_dir(dir);
1080        dput(dentry);
1081exit:
1082        return error;
1083}
1084
1085asmlinkage int sys_unlink(const char * pathname)
1086{
1087        int error;
1088        char * tmp;
1089
1090        lock_kernel();
1091        tmp = getname(pathname);
1092        error = PTR_ERR(tmp);
1093        if (!IS_ERR(tmp)) {
1094                error = do_unlink(tmp);
1095                putname(tmp);
1096        }
1097        unlock_kernel();
1098        return error;
1099}
1100
1101static inline int do_symlink(const char * oldname, const char * newname)
1102{
1103        int error;
1104        struct dentry *dir;
1105        struct dentry *dentry;
1106
1107        dentry = lookup_dentry(newname, NULL, 0);
1108
1109        error = PTR_ERR(dentry);
1110        if (IS_ERR(dentry))
1111                goto exit;
1112
1113        dir = lock_parent(dentry);
1114        error = -ENOENT;
1115        if (!check_parent(dir, dentry))
1116                goto exit_lock;
1117
1118        error = may_create(dir->d_inode, dentry);
1119        if (error)
1120                goto exit_lock;
1121
1122        error = -EPERM;
1123        if (!dir->d_inode->i_op || !dir->d_inode->i_op->symlink)
1124                goto exit_lock;
1125
1126        DQUOT_INIT(dir->d_inode);
1127        error = dir->d_inode->i_op->symlink(dir->d_inode, dentry, oldname);
1128
1129exit_lock:
1130        unlock_dir(dir);
1131        dput(dentry);
1132exit:
1133        return error;
1134}
1135
1136asmlinkage int sys_symlink(const char * oldname, const char * newname)
1137{
1138        int error;
1139        char * from;
1140
1141        lock_kernel();
1142        from = getname(oldname);
1143        error = PTR_ERR(from);
1144        if (!IS_ERR(from)) {
1145                char * to;
1146                to = getname(newname);
1147                error = PTR_ERR(to);
1148                if (!IS_ERR(to)) {
1149                        error = do_symlink(from,to);
1150                        putname(to);
1151                }
1152                putname(from);
1153        }
1154        unlock_kernel();
1155        return error;
1156}
1157
1158static inline int do_link(const char * oldname, const char * newname)
1159{
1160        struct dentry *old_dentry, *new_dentry, *dir;
1161        struct inode *inode;
1162        int error;
1163
1164        /*
1165         * Hardlinks are often used in delicate situations.  We avoid
1166         * security-related surprises by not following symlinks on the
1167         * newname.  --KAB
1168         *
1169         * We don't follow them on the oldname either to be compatible
1170         * with linux 2.0, and to avoid hard-linking to directories
1171         * and other special files.  --ADM
1172         */
1173        old_dentry = lookup_dentry(oldname, NULL, 0);
1174        error = PTR_ERR(old_dentry);
1175        if (IS_ERR(old_dentry))
1176                goto exit;
1177
1178        new_dentry = lookup_dentry(newname, NULL, 0);
1179        error = PTR_ERR(new_dentry);
1180        if (IS_ERR(new_dentry))
1181                goto exit_old;
1182
1183        dir = lock_parent(new_dentry);
1184        error = -ENOENT;
1185        if (!check_parent(dir, new_dentry))
1186                goto exit_lock;
1187
1188        error = -ENOENT;
1189        inode = old_dentry->d_inode;
1190        if (!inode)
1191                goto exit_lock;
1192
1193        error = may_create(dir->d_inode, new_dentry);
1194        if (error)
1195                goto exit_lock;
1196
1197        error = -EXDEV;
1198        if (dir->d_inode->i_dev != inode->i_dev)
1199                goto exit_lock;
1200
1201        /*
1202         * A link to an append-only or immutable file cannot be created.
1203         */
1204        error = -EPERM;
1205        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1206                goto exit_lock;
1207
1208        error = -EPERM;
1209        if (!dir->d_inode->i_op || !dir->d_inode->i_op->link)
1210                goto exit_lock;
1211
1212        DQUOT_INIT(dir->d_inode);
1213        error = dir->d_inode->i_op->link(old_dentry, dir->d_inode, new_dentry);
1214
1215exit_lock:
1216        unlock_dir(dir);
1217        dput(new_dentry);
1218exit_old:
1219        dput(old_dentry);
1220exit:
1221        return error;
1222}
1223
1224asmlinkage int sys_link(const char * oldname, const char * newname)
1225{
1226        int error;
1227        char * from;
1228
1229        lock_kernel();
1230        from = getname(oldname);
1231        error = PTR_ERR(from);
1232        if (!IS_ERR(from)) {
1233                char * to;
1234                to = getname(newname);
1235                error = PTR_ERR(to);
1236                if (!IS_ERR(to)) {
1237                        error = do_link(from,to);
1238                        putname(to);
1239                }
1240                putname(from);
1241        }
1242        unlock_kernel();
1243        return error;
1244}
1245
1246int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1247               struct inode *new_dir, struct dentry *new_dentry)
1248{
1249        int error;
1250        int need_rehash = 0;
1251
1252        if (old_dentry->d_inode == new_dentry->d_inode)
1253                return 0;
1254
1255        error = may_delete(old_dir, old_dentry, 1);
1256        if (error)
1257                return error;
1258
1259        if (new_dir->i_dev != old_dir->i_dev)
1260                return -EXDEV;
1261
1262        if (!new_dentry->d_inode)
1263                error = may_create(new_dir, new_dentry);
1264        else
1265                error = may_delete(new_dir, new_dentry, 1);
1266        if (error)
1267                return error;
1268
1269        if (!old_dir->i_op || !old_dir->i_op->rename)
1270                return -EPERM;
1271
1272        /*
1273         * If we are going to change the parent - check write permissions,
1274         * we'll need to flip '..'.
1275         */
1276        if (new_dir != old_dir) {
1277                error = permission(old_dentry->d_inode, MAY_WRITE);
1278        }
1279        if (error)
1280                return error;
1281
1282        DQUOT_INIT(old_dir);
1283        DQUOT_INIT(new_dir);
1284        down(&old_dir->i_sb->s_vfs_rename_sem);
1285        error = -EINVAL;
1286        if (is_subdir(new_dentry, old_dentry))
1287                goto out_unlock;
1288        if (new_dentry->d_inode) {
1289                error = -EBUSY;
1290                if (d_invalidate(new_dentry)<0)
1291                        goto out_unlock;
1292                need_rehash = 1;
1293        }
1294        error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1295        if (need_rehash)
1296                d_rehash(new_dentry);
1297        if (!error)
1298                d_move(old_dentry,new_dentry);
1299out_unlock:
1300        up(&old_dir->i_sb->s_vfs_rename_sem);
1301        return error;
1302}
1303
1304int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1305               struct inode *new_dir, struct dentry *new_dentry)
1306{
1307        int error;
1308
1309        if (old_dentry->d_inode == new_dentry->d_inode)
1310                return 0;
1311
1312        error = may_delete(old_dir, old_dentry, 0);
1313        if (error)
1314                return error;
1315
1316        if (new_dir->i_dev != old_dir->i_dev)
1317                return -EXDEV;
1318
1319        if (!new_dentry->d_inode)
1320                error = may_create(new_dir, new_dentry);
1321        else
1322                error = may_delete(new_dir, new_dentry, 0);
1323        if (error)
1324                return error;
1325
1326        if (!old_dir->i_op || !old_dir->i_op->rename)
1327                return -EPERM;
1328
1329        DQUOT_INIT(old_dir);
1330        DQUOT_INIT(new_dir);
1331        error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1332        if (error)
1333                return error;
1334        /* The following d_move() should become unconditional */
1335        if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
1336                d_move(old_dentry, new_dentry);
1337        }
1338        return 0;
1339}
1340
1341int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1342               struct inode *new_dir, struct dentry *new_dentry)
1343{
1344        if (S_ISDIR(old_dentry->d_inode->i_mode))
1345                return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1346        else
1347                return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1348}
1349
1350static inline int do_rename(const char * oldname, const char * newname)
1351{
1352        int error;
1353        struct dentry * old_dir, * new_dir;
1354        struct dentry * old_dentry, *new_dentry;
1355
1356        old_dentry = lookup_dentry(oldname, NULL, 0);
1357
1358        error = PTR_ERR(old_dentry);
1359        if (IS_ERR(old_dentry))
1360                goto exit;
1361
1362        error = -ENOENT;
1363        if (!old_dentry->d_inode)
1364                goto exit_old;
1365
1366        {
1367                unsigned int flags = 0;
1368                if (S_ISDIR(old_dentry->d_inode->i_mode))
1369                        flags = LOOKUP_SLASHOK;
1370                new_dentry = lookup_dentry(newname, NULL, flags);
1371        }
1372
1373        error = PTR_ERR(new_dentry);
1374        if (IS_ERR(new_dentry))
1375                goto exit_old;
1376
1377        new_dir = get_parent(new_dentry);
1378        old_dir = get_parent(old_dentry);
1379
1380        double_lock(new_dir, old_dir);
1381
1382        error = -ENOENT;
1383        if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry))
1384                error = vfs_rename(old_dir->d_inode, old_dentry,
1385                                   new_dir->d_inode, new_dentry);
1386
1387        double_unlock(new_dir, old_dir);
1388        dput(new_dentry);
1389exit_old:
1390        dput(old_dentry);
1391exit:
1392        return error;
1393}
1394
1395asmlinkage int sys_rename(const char * oldname, const char * newname)
1396{
1397        int error;
1398        char * from;
1399
1400        lock_kernel();
1401        from = getname(oldname);
1402        error = PTR_ERR(from);
1403        if (!IS_ERR(from)) {
1404                char * to;
1405                to = getname(newname);
1406                error = PTR_ERR(to);
1407                if (!IS_ERR(to)) {
1408                        error = do_rename(from,to);
1409                        putname(to);
1410                }
1411                putname(from);
1412        }
1413        unlock_kernel();
1414        return error;
1415}
1416
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.