linux-old/fs/super.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/super.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 *
   6 *  super.c contains code to handle: - mount structures
   7 *                                   - super-block tables.
   8 *                                   - mount system call
   9 *                                   - umount system call
  10 *
  11 *  Added options to /proc/mounts
  12 *  Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
  13 *
  14 * GK 2/5/95  -  Changed to support mounting the root fs via NFS
  15 *
  16 *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
  17 *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
  18 */
  19
  20#include <linux/config.h>
  21#include <linux/malloc.h>
  22#include <linux/locks.h>
  23#include <linux/smp_lock.h>
  24#include <linux/fd.h>
  25#include <linux/init.h>
  26#include <linux/quotaops.h>
  27#include <linux/acct.h>
  28
  29#include <asm/uaccess.h>
  30
  31#include <linux/nfs_fs.h>
  32#include <linux/nfs_fs_sb.h>
  33#include <linux/nfs_mount.h>
  34
  35#ifdef CONFIG_KMOD
  36#include <linux/kmod.h>
  37#endif
  38
  39/*
  40 * We use a semaphore to synchronize all mount/umount
  41 * activity - imagine the mess if we have a race between
  42 * unmounting a filesystem and re-mounting it (or something
  43 * else).
  44 */
  45static struct semaphore mount_sem = MUTEX;
  46
  47extern void wait_for_keypress(void);
  48extern struct file_operations * get_blkfops(unsigned int major);
  49
  50extern int root_mountflags;
  51extern void rd_load_secondary(void);
  52
  53static int do_remount_sb(struct super_block *sb, int flags, char * data);
  54
  55/* this is initialized in init/main.c */
  56kdev_t ROOT_DEV;
  57
  58int nr_super_blocks = 0;
  59int max_super_blocks = NR_SUPER;
  60LIST_HEAD(super_blocks);
  61
  62static struct file_system_type *file_systems = (struct file_system_type *) NULL;
  63struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
  64static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
  65                       *mru_vfsmnt = (struct vfsmount *) NULL;
  66
  67/* 
  68 * This part handles the management of the list of mounted filesystems.
  69 */
  70struct vfsmount *lookup_vfsmnt(kdev_t dev)
  71{
  72        struct vfsmount *lptr;
  73
  74        if (vfsmntlist == (struct vfsmount *)NULL)
  75                return ((struct vfsmount *)NULL);
  76
  77        if (mru_vfsmnt != (struct vfsmount *)NULL &&
  78            mru_vfsmnt->mnt_dev == dev)
  79                return (mru_vfsmnt);
  80
  81        for (lptr = vfsmntlist;
  82             lptr != (struct vfsmount *)NULL;
  83             lptr = lptr->mnt_next)
  84                if (lptr->mnt_dev == dev) {
  85                        mru_vfsmnt = lptr;
  86                        return (lptr);
  87                }
  88
  89        return ((struct vfsmount *)NULL);
  90        /* NOTREACHED */
  91}
  92
  93static struct vfsmount *add_vfsmnt(struct super_block *sb,
  94                        const char *dev_name, const char *dir_name)
  95{
  96        struct vfsmount *lptr;
  97        char *tmp, *name;
  98
  99        lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
 100        if (!lptr)
 101                goto out;
 102        memset(lptr, 0, sizeof(struct vfsmount));
 103
 104        lptr->mnt_sb = sb;
 105        lptr->mnt_dev = sb->s_dev;
 106        lptr->mnt_flags = sb->s_flags;
 107
 108        sema_init(&lptr->mnt_dquot.dqio_sem, 1);
 109        sema_init(&lptr->mnt_dquot.dqoff_sem, 1);
 110        lptr->mnt_dquot.flags = 0;
 111
 112        /* N.B. Is it really OK to have a vfsmount without names? */
 113        if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
 114                name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 115                if (name) {
 116                        strcpy(name, tmp);
 117                        lptr->mnt_devname = name;
 118                }
 119                putname(tmp);
 120        }
 121        if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
 122                name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 123                if (name) {
 124                        strcpy(name, tmp);
 125                        lptr->mnt_dirname = name;
 126                }
 127                putname(tmp);
 128        }
 129
 130        if (vfsmntlist == (struct vfsmount *)NULL) {
 131                vfsmntlist = vfsmnttail = lptr;
 132        } else {
 133                vfsmnttail->mnt_next = lptr;
 134                vfsmnttail = lptr;
 135        }
 136out:
 137        return lptr;
 138}
 139
 140static void remove_vfsmnt(kdev_t dev)
 141{
 142        struct vfsmount *lptr, *tofree;
 143
 144        if (vfsmntlist == (struct vfsmount *)NULL)
 145                return;
 146        lptr = vfsmntlist;
 147        if (lptr->mnt_dev == dev) {
 148                tofree = lptr;
 149                vfsmntlist = lptr->mnt_next;
 150                if (vfsmnttail->mnt_dev == dev)
 151                        vfsmnttail = vfsmntlist;
 152        } else {
 153                while (lptr->mnt_next != (struct vfsmount *)NULL) {
 154                        if (lptr->mnt_next->mnt_dev == dev)
 155                                break;
 156                        lptr = lptr->mnt_next;
 157                }
 158                tofree = lptr->mnt_next;
 159                if (tofree == (struct vfsmount *)NULL)
 160                        return;
 161                lptr->mnt_next = lptr->mnt_next->mnt_next;
 162                if (vfsmnttail->mnt_dev == dev)
 163                        vfsmnttail = lptr;
 164        }
 165        if (tofree == mru_vfsmnt)
 166                mru_vfsmnt = NULL;
 167        kfree(tofree->mnt_devname);
 168        kfree(tofree->mnt_dirname);
 169        kfree_s(tofree, sizeof(struct vfsmount));
 170}
 171
 172int register_filesystem(struct file_system_type * fs)
 173{
 174        struct file_system_type ** tmp;
 175
 176        if (!fs)
 177                return -EINVAL;
 178        if (fs->next)
 179                return -EBUSY;
 180        tmp = &file_systems;
 181        while (*tmp) {
 182                if (strcmp((*tmp)->name, fs->name) == 0)
 183                        return -EBUSY;
 184                tmp = &(*tmp)->next;
 185        }
 186        *tmp = fs;
 187        return 0;
 188}
 189
 190int unregister_filesystem(struct file_system_type * fs)
 191{
 192#ifdef CONFIG_MODULES
 193        struct file_system_type ** tmp;
 194
 195        tmp = &file_systems;
 196        while (*tmp) {
 197                if (fs == *tmp) {
 198                        *tmp = fs->next;
 199                        fs->next = NULL;
 200                        return 0;
 201                }
 202                tmp = &(*tmp)->next;
 203        }
 204#endif
 205        return -EINVAL;
 206}
 207
 208static int fs_index(const char * __name)
 209{
 210        struct file_system_type * tmp;
 211        char * name;
 212        int err, index;
 213
 214        name = getname(__name);
 215        err = PTR_ERR(name);
 216        if (IS_ERR(name))
 217                return err;
 218
 219        index = 0;
 220        for (tmp = file_systems ; tmp ; tmp = tmp->next) {
 221                if (strcmp(tmp->name, name) == 0) {
 222                        putname(name);
 223                        return index;
 224                }
 225                index++;
 226        }
 227        putname(name);
 228        return -EINVAL;
 229}
 230
 231static int fs_name(unsigned int index, char * buf)
 232{
 233        struct file_system_type * tmp;
 234        int len;
 235
 236        tmp = file_systems;
 237        while (tmp && index > 0) {
 238                tmp = tmp->next;
 239                index--;
 240        }
 241        if (!tmp)
 242                return -EINVAL;
 243        len = strlen(tmp->name) + 1;
 244        return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
 245}
 246
 247static int fs_maxindex(void)
 248{
 249        struct file_system_type * tmp;
 250        int index;
 251
 252        index = 0;
 253        for (tmp = file_systems ; tmp ; tmp = tmp->next)
 254                index++;
 255        return index;
 256}
 257
 258/*
 259 * Whee.. Weird sysv syscall. 
 260 */
 261asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
 262{
 263        int retval = -EINVAL;
 264
 265        lock_kernel();
 266        switch (option) {
 267                case 1:
 268                        retval = fs_index((const char *) arg1);
 269                        break;
 270
 271                case 2:
 272                        retval = fs_name(arg1, (char *) arg2);
 273                        break;
 274
 275                case 3:
 276                        retval = fs_maxindex();
 277                        break;
 278        }
 279        unlock_kernel();
 280        return retval;
 281}
 282
 283static struct proc_fs_info {
 284        int flag;
 285        char *str;
 286} fs_info[] = {
 287        { MS_NOEXEC, ",noexec" },
 288        { MS_NOSUID, ",nosuid" },
 289        { MS_NODEV, ",nodev" },
 290        { MS_SYNCHRONOUS, ",sync" },
 291        { MS_MANDLOCK, ",mand" },
 292        { MS_NOATIME, ",noatime" },
 293        { MS_NODIRATIME, ",nodiratime" },
 294#ifdef MS_NOSUB                 /* Can't find this except in mount.c */
 295        { MS_NOSUB, ",nosub" },
 296#endif
 297        { 0, NULL }
 298};
 299
 300static struct proc_nfs_info {
 301        int flag;
 302        char *str;
 303} nfs_info[] = {
 304        { NFS_MOUNT_SOFT, ",soft" },
 305        { NFS_MOUNT_INTR, ",intr" },
 306        { NFS_MOUNT_POSIX, ",posix" },
 307        { NFS_MOUNT_TCP, ",tcp" },
 308        { NFS_MOUNT_NOCTO, ",nocto" },
 309        { NFS_MOUNT_NOAC, ",noac" },
 310        { NFS_MOUNT_NONLM, ",nolock" },
 311        { 0, NULL }
 312};
 313
 314int get_filesystem_info( char *buf )
 315{
 316        struct vfsmount *tmp = vfsmntlist;
 317        struct proc_fs_info *fs_infop;
 318        struct proc_nfs_info *nfs_infop;
 319        struct nfs_server *nfss;
 320        int len = 0;
 321
 322        while ( tmp && len < PAGE_SIZE - 160)
 323        {
 324                len += sprintf( buf + len, "%s %s %s %s",
 325                        tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
 326                        tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
 327                for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 328                  if (tmp->mnt_flags & fs_infop->flag) {
 329                    strcpy(buf + len, fs_infop->str);
 330                    len += strlen(fs_infop->str);
 331                  }
 332                }
 333                if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
 334                        nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
 335                        len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version);
 336
 337                        if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 338                                len += sprintf(buf+len, ",rsize=%d",
 339                                               nfss->rsize);
 340                        }
 341                        if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 342                                len += sprintf(buf+len, ",wsize=%d",
 343                                               nfss->wsize);
 344                        }
 345#if 0
 346                        if (nfss->timeo != 7*HZ/10) {
 347                                len += sprintf(buf+len, ",timeo=%d",
 348                                               nfss->timeo*10/HZ);
 349                        }
 350                        if (nfss->retrans != 3) {
 351                                len += sprintf(buf+len, ",retrans=%d",
 352                                               nfss->retrans);
 353                        }
 354#endif
 355                        if (nfss->acregmin != 3*HZ) {
 356                                len += sprintf(buf+len, ",acregmin=%d",
 357                                               nfss->acregmin/HZ);
 358                        }
 359                        if (nfss->acregmax != 60*HZ) {
 360                                len += sprintf(buf+len, ",acregmax=%d",
 361                                               nfss->acregmax/HZ);
 362                        }
 363                        if (nfss->acdirmin != 30*HZ) {
 364                                len += sprintf(buf+len, ",acdirmin=%d",
 365                                               nfss->acdirmin/HZ);
 366                        }
 367                        if (nfss->acdirmax != 60*HZ) {
 368                                len += sprintf(buf+len, ",acdirmax=%d",
 369                                               nfss->acdirmax/HZ);
 370                        }
 371                        for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 372                                if (nfss->flags & nfs_infop->flag) {
 373                                        strcpy(buf + len, nfs_infop->str);
 374                                        len += strlen(nfs_infop->str);
 375                                }
 376                        }
 377                        len += sprintf(buf+len, ",addr=%s",
 378                                       nfss->hostname);
 379                }
 380                len += sprintf( buf + len, " 0 0\n" );
 381                tmp = tmp->mnt_next;
 382        }
 383
 384        return len;
 385}
 386
 387int get_filesystem_list(char * buf)
 388{
 389        int len = 0;
 390        struct file_system_type * tmp;
 391
 392        tmp = file_systems;
 393        while (tmp && len < PAGE_SIZE - 80) {
 394                len += sprintf(buf+len, "%s\t%s\n",
 395                        (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
 396                        tmp->name);
 397                tmp = tmp->next;
 398        }
 399        return len;
 400}
 401
 402struct file_system_type *get_fs_type(const char *name)
 403{
 404        struct file_system_type * fs = file_systems;
 405        
 406        if (!name)
 407                return fs;
 408        for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 409                ;
 410#ifdef CONFIG_KMOD
 411        if (!fs && (request_module(name) == 0)) {
 412                for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 413                        ;
 414        }
 415#endif
 416
 417        return fs;
 418}
 419
 420void __wait_on_super(struct super_block * sb)
 421{
 422        struct wait_queue wait = { current, NULL };
 423
 424        add_wait_queue(&sb->s_wait, &wait);
 425repeat:
 426        current->state = TASK_UNINTERRUPTIBLE;
 427        if (sb->s_lock) {
 428                schedule();
 429                goto repeat;
 430        }
 431        remove_wait_queue(&sb->s_wait, &wait);
 432        current->state = TASK_RUNNING;
 433}
 434
 435/*
 436 * Note: check the dirty flag before waiting, so we don't
 437 * hold up the sync while mounting a device. (The newly
 438 * mounted device won't need syncing.)
 439 */
 440void sync_supers(kdev_t dev)
 441{
 442        struct super_block * sb;
 443
 444        for (sb = sb_entry(super_blocks.next);
 445             sb != sb_entry(&super_blocks); 
 446             sb = sb_entry(sb->s_list.next)) {
 447                if (!sb->s_dev)
 448                        continue;
 449                if (dev && sb->s_dev != dev)
 450                        continue;
 451                if (!sb->s_dirt)
 452                        continue;
 453                /* N.B. Should lock the superblock while writing */
 454                wait_on_super(sb);
 455                if (!sb->s_dev || !sb->s_dirt)
 456                        continue;
 457                if (dev && (dev != sb->s_dev))
 458                        continue;
 459                if (sb->s_op && sb->s_op->write_super)
 460                        sb->s_op->write_super(sb);
 461        }
 462}
 463
 464struct super_block * get_super(kdev_t dev)
 465{
 466        struct super_block * s;
 467
 468        if (!dev)
 469                return NULL;
 470restart:
 471        s = sb_entry(super_blocks.next);
 472        while (s != sb_entry(&super_blocks))
 473                if (s->s_dev == dev) {
 474                        wait_on_super(s);
 475                        if (s->s_dev == dev)
 476                                return s;
 477                        goto restart;
 478                } else
 479                        s = sb_entry(s->s_list.next);
 480        return NULL;
 481}
 482
 483asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
 484{
 485        struct super_block *s;
 486        struct ustat tmp;
 487        struct statfs sbuf;
 488        mm_segment_t old_fs;
 489        int err = -EINVAL;
 490
 491        lock_kernel();
 492        s = get_super(to_kdev_t(dev));
 493        if (s == NULL)
 494                goto out;
 495        err = -ENOSYS;
 496        if (!(s->s_op->statfs))
 497                goto out;
 498
 499        old_fs = get_fs();
 500        set_fs(get_ds());
 501        s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
 502        set_fs(old_fs);
 503
 504        memset(&tmp,0,sizeof(struct ustat));
 505        tmp.f_tfree = sbuf.f_bfree;
 506        tmp.f_tinode = sbuf.f_ffree;
 507
 508        err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
 509out:
 510        unlock_kernel();
 511        return err;
 512}
 513
 514/*
 515 * Find a super_block with no device assigned.
 516 */
 517static struct super_block *get_empty_super(void)
 518{
 519        struct super_block *s;
 520
 521        for (s  = sb_entry(super_blocks.next);
 522             s != sb_entry(&super_blocks); 
 523             s  = sb_entry(s->s_list.next)) {
 524                if (s->s_dev)
 525                        continue;
 526                if (!s->s_lock)
 527                        return s;
 528                printk("VFS: empty superblock %p locked!\n", s);
 529        }
 530        /* Need a new one... */
 531        if (nr_super_blocks >= max_super_blocks)
 532                return NULL;
 533        s = kmalloc(sizeof(struct super_block),  GFP_USER);
 534        if (s) {
 535                nr_super_blocks++;
 536                memset(s, 0, sizeof(struct super_block));
 537                INIT_LIST_HEAD(&s->s_dirty);
 538                list_add (&s->s_list, super_blocks.prev);
 539        }
 540        return s;
 541}
 542
 543static struct super_block * read_super(kdev_t dev,const char *name,int flags,
 544                                       void *data, int silent)
 545{
 546        struct super_block * s;
 547        struct file_system_type *type;
 548
 549        if (!dev)
 550                goto out_null;
 551        check_disk_change(dev);
 552        s = get_super(dev);
 553        if (s)
 554                goto out;
 555
 556        type = get_fs_type(name);
 557        if (!type) {
 558                printk("VFS: on device %s: get_fs_type(%s) failed\n",
 559                       kdevname(dev), name);
 560                goto out;
 561        }
 562        s = get_empty_super();
 563        if (!s)
 564                goto out;
 565        s->s_dev = dev;
 566        s->s_flags = flags;
 567        s->s_dirt = 0;
 568        sema_init(&s->s_vfs_rename_sem,1);
 569        sema_init(&s->s_nfsd_free_path_sem,1);
 570        /* N.B. Should lock superblock now ... */
 571        if (!type->read_super(s, data, silent))
 572                goto out_fail;
 573        s->s_dev = dev; /* N.B. why do this again?? */
 574        s->s_rd_only = 0;
 575        s->s_type = type;
 576out:
 577        return s;
 578
 579        /* N.B. s_dev should be cleared in type->read_super */
 580out_fail:
 581        s->s_dev = 0;
 582out_null:
 583        s = NULL;
 584        goto out;
 585}
 586
 587/*
 588 * Unnamed block devices are dummy devices used by virtual
 589 * filesystems which don't use real block-devices.  -- jrs
 590 */
 591
 592static unsigned long unnamed_dev_in_use[256/(8*sizeof(unsigned long))] = { 0, };
 593
 594kdev_t get_unnamed_dev(void)
 595{
 596        int i;
 597
 598        for (i = 1; i < 256; i++) {
 599                if (!test_and_set_bit(i,unnamed_dev_in_use))
 600                        return MKDEV(UNNAMED_MAJOR, i);
 601        }
 602        return 0;
 603}
 604
 605void put_unnamed_dev(kdev_t dev)
 606{
 607        if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
 608                return;
 609        if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
 610                return;
 611        printk("VFS: put_unnamed_dev: freeing unused device %s\n",
 612                        kdevname(dev));
 613}
 614
 615static int d_umount(struct super_block * sb)
 616{
 617        struct dentry * root = sb->s_root;
 618        struct dentry * covered = root->d_covers;
 619
 620        if (root->d_count != 1)
 621                return -EBUSY;
 622
 623        if (root->d_inode->i_state)
 624                return -EBUSY;
 625
 626        sb->s_root = NULL;
 627
 628        if (covered != root) {
 629                root->d_covers = root;
 630                covered->d_mounts = covered;
 631                dput(covered);
 632        }
 633        dput(root);
 634        return 0;
 635}
 636
 637static void d_mount(struct dentry *covered, struct dentry *dentry)
 638{
 639        if (covered->d_mounts != covered) {
 640                printk("VFS: mount - already mounted\n");
 641                return;
 642        }
 643        covered->d_mounts = dentry;
 644        dentry->d_covers = covered;
 645}
 646
 647static int do_umount(kdev_t dev, int unmount_root, int flags)
 648{
 649        struct super_block * sb;
 650        int retval;
 651        
 652        retval = -ENOENT;
 653        sb = get_super(dev);
 654        if (!sb || !sb->s_root)
 655                goto out;
 656
 657        /*
 658         * Before checking whether the filesystem is still busy,
 659         * make sure the kernel doesn't hold any quota files open
 660         * on the device. If the umount fails, too bad -- there
 661         * are no quotas running any more. Just turn them on again.
 662         */
 663        DQUOT_OFF(dev);
 664        acct_auto_close(dev);
 665
 666        /*
 667         * If we may have to abort operations to get out of this
 668         * mount, and they will themselves hold resources we must
 669         * allow the fs to do things. In the Unix tradition of
 670         * 'Gee thats tricky lets do it in userspace' the umount_begin
 671         * might fail to complete on the first run through as other tasks
 672         * must return, and the like. Thats for the mount program to worry
 673         * about for the moment.
 674         */
 675         
 676        if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
 677                sb->s_op->umount_begin(sb);
 678
 679        /*
 680         * Shrink dcache, then fsync. This guarantees that if the
 681         * filesystem is quiescent at this point, then (a) only the
 682         * root entry should be in use and (b) that root entry is
 683         * clean.
 684         */
 685        shrink_dcache_sb(sb);
 686        fsync_dev(dev);
 687
 688        if (dev==ROOT_DEV && !unmount_root) {
 689                /*
 690                 * Special case for "unmounting" root ...
 691                 * we just try to remount it readonly.
 692                 */
 693                retval = 0;
 694                if (!(sb->s_flags & MS_RDONLY))
 695                        retval = do_remount_sb(sb, MS_RDONLY, 0);
 696                return retval;
 697        }
 698
 699        retval = d_umount(sb);
 700        if (retval)
 701                goto out;
 702
 703        if (sb->s_op) {
 704                if (sb->s_op->write_super && sb->s_dirt)
 705                        sb->s_op->write_super(sb);
 706        }
 707
 708        lock_super(sb);
 709        if (sb->s_op) {
 710                if (sb->s_op->put_super)
 711                        sb->s_op->put_super(sb);
 712        }
 713
 714        /* Forget any remaining inodes */
 715        if (invalidate_inodes(sb)) {
 716                printk("VFS: Busy inodes after unmount. "
 717                        "Self-destruct in 5 seconds.  Have a nice day...\n");
 718        }
 719
 720        sb->s_dev = 0;          /* Free the superblock */
 721        unlock_super(sb);
 722
 723        remove_vfsmnt(dev);
 724out:
 725        return retval;
 726}
 727
 728static int umount_dev(kdev_t dev, int flags)
 729{
 730        int retval;
 731        struct inode * inode = get_empty_inode();
 732
 733        retval = -ENOMEM;
 734        if (!inode)
 735                goto out;
 736
 737        inode->i_rdev = dev;
 738        retval = -ENXIO;
 739        if (MAJOR(dev) >= MAX_BLKDEV)
 740                goto out_iput;
 741
 742        fsync_dev(dev);
 743
 744        down(&mount_sem);
 745
 746        retval = do_umount(dev, 0, flags);
 747        if (!retval) {
 748                fsync_dev(dev);
 749                if (dev != ROOT_DEV) {
 750                        blkdev_release(inode);
 751                        put_unnamed_dev(dev);
 752                }
 753        }
 754
 755        up(&mount_sem);
 756out_iput:
 757        iput(inode);
 758out:
 759        return retval;
 760}
 761
 762/*
 763 * Now umount can handle mount points as well as block devices.
 764 * This is important for filesystems which use unnamed block devices.
 765 *
 766 * There is a little kludge here with the dummy_inode.  The current
 767 * vfs release functions only use the r_dev field in the inode so
 768 * we give them the info they need without using a real inode.
 769 * If any other fields are ever needed by any block device release
 770 * functions, they should be faked here.  -- jrs
 771 *
 772 * We now support a flag for forced unmount like the other 'big iron'
 773 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
 774 */
 775
 776asmlinkage int sys_umount(char * name, int flags)
 777{
 778        struct dentry * dentry;
 779        int retval;
 780
 781        if (!capable(CAP_SYS_ADMIN))
 782                return -EPERM;
 783
 784        lock_kernel();
 785        dentry = namei(name);
 786        retval = PTR_ERR(dentry);
 787        if (!IS_ERR(dentry)) {
 788                struct inode * inode = dentry->d_inode;
 789                kdev_t dev = inode->i_rdev;
 790
 791                retval = 0;             
 792                if (S_ISBLK(inode->i_mode)) {
 793                        if (IS_NODEV(inode))
 794                                retval = -EACCES;
 795                } else {
 796                        struct super_block *sb = inode->i_sb;
 797                        retval = -EINVAL;
 798                        if (sb && inode == sb->s_root->d_inode) {
 799                                dev = sb->s_dev;
 800                                retval = 0;
 801                        }
 802                }
 803                dput(dentry);
 804
 805                if (!retval)
 806                        retval = umount_dev(dev, flags);
 807        }
 808        unlock_kernel();
 809        return retval;
 810}
 811
 812/*
 813 *      The 2.0 compatible umount. No flags. 
 814 */
 815 
 816asmlinkage int sys_oldumount(char * name)
 817{
 818        return sys_umount(name,0);
 819}
 820
 821/*
 822 * Check whether we can mount the specified device.
 823 */
 824int fs_may_mount(kdev_t dev)
 825{
 826        struct super_block * sb = get_super(dev);
 827        int busy;
 828
 829        busy = sb && sb->s_root &&
 830               (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
 831        return !busy;
 832}
 833
 834/*
 835 * do_mount() does the actual mounting after sys_mount has done the ugly
 836 * parameter parsing. When enough time has gone by, and everything uses the
 837 * new mount() parameters, sys_mount() can then be cleaned up.
 838 *
 839 * We cannot mount a filesystem if it has active, used, or dirty inodes.
 840 * We also have to flush all inode-data for this device, as the new mount
 841 * might need new info.
 842 *
 843 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
 844 * supplying a leading "!" before the dir_name, allowing "stacks" of
 845 * mounted filesystems. The stacking will only influence any pathname lookups
 846 * _after_ the mount, but open file descriptors or working directories that
 847 * are now covered remain valid. For example, when you overmount /home, any
 848 * process with old cwd /home/joe will continue to use the old versions,
 849 * as long as relative paths are used, but absolute paths like /home/joe/xxx
 850 * will go to the new "top of stack" version. In general, crossing a
 851 * mount point will always go to the top of stack element.
 852 * Anyone using this new feature must know what he/she is doing.
 853 */
 854
 855int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data)
 856{
 857        struct dentry * dir_d;
 858        struct super_block * sb;
 859        struct vfsmount *vfsmnt;
 860        int error;
 861
 862        error = -EACCES;
 863        if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
 864                goto out;
 865
 866        /*
 867         * Do the lookup first to force automounting.
 868         */
 869        dir_d = namei(dir_name);
 870        error = PTR_ERR(dir_d);
 871        if (IS_ERR(dir_d))
 872                goto out;
 873
 874        down(&mount_sem);
 875        error = -ENOTDIR;
 876        if (!S_ISDIR(dir_d->d_inode->i_mode))
 877                goto dput_and_out;
 878
 879        error = -EBUSY;
 880        if (dir_d->d_covers != dir_d)
 881                goto dput_and_out;
 882
 883        /*
 884         * Note: If the superblock already exists,
 885         * read_super just does a get_super().
 886         */
 887        error = -EINVAL;
 888        sb = read_super(dev, type, flags, data, 0);
 889        if (!sb)
 890                goto dput_and_out;
 891
 892        /*
 893         * We may have slept while reading the super block, 
 894         * so we check afterwards whether it's safe to mount.
 895         */
 896        error = -EBUSY;
 897        if (!fs_may_mount(dev))
 898                goto dput_and_out;
 899
 900        error = -ENOMEM;
 901        vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
 902        if (vfsmnt) {
 903                d_mount(dget(dir_d), sb->s_root);
 904                error = 0;
 905        }
 906
 907dput_and_out:
 908        dput(dir_d);
 909        up(&mount_sem);
 910out:
 911        return error;   
 912}
 913
 914
 915/*
 916 * Alters the mount flags of a mounted file system. Only the mount point
 917 * is used as a reference - file system type and the device are ignored.
 918 * FS-specific mount options can't be altered by remounting.
 919 */
 920
 921static int do_remount_sb(struct super_block *sb, int flags, char *data)
 922{
 923        int retval;
 924        struct vfsmount *vfsmnt;
 925        
 926        /*
 927         * Invalidate the inodes, as some mount options may be changed.
 928         * N.B. If we are changing media, we should check the return
 929         * from invalidate_inodes ... can't allow _any_ open files.
 930         */
 931        invalidate_inodes(sb);
 932
 933        if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
 934                return -EACCES;
 935                /*flags |= MS_RDONLY;*/
 936        /* If we are remounting RDONLY, make sure there are no rw files open */
 937        if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
 938                if (!fs_may_remount_ro(sb))
 939                        return -EBUSY;
 940        if (sb->s_op && sb->s_op->remount_fs) {
 941                retval = sb->s_op->remount_fs(sb, &flags, data);
 942                if (retval)
 943                        return retval;
 944        }
 945        sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 946        vfsmnt = lookup_vfsmnt(sb->s_dev);
 947        if (vfsmnt)
 948                vfsmnt->mnt_flags = sb->s_flags;
 949        return 0;
 950}
 951
 952static int do_remount(const char *dir,int flags,char *data)
 953{
 954        struct dentry *dentry;
 955        int retval;
 956
 957        dentry = namei(dir);
 958        retval = PTR_ERR(dentry);
 959        if (!IS_ERR(dentry)) {
 960                struct super_block * sb = dentry->d_inode->i_sb;
 961
 962                retval = -ENODEV;
 963                if (sb) {
 964                        retval = -EINVAL;
 965                        if (dentry == sb->s_root) {
 966                                /*
 967                                 * Shrink the dcache and sync the device.
 968                                 */
 969                                shrink_dcache_sb(sb);
 970                                fsync_dev(sb->s_dev);
 971                                if (flags & MS_RDONLY)
 972                                        acct_auto_close(sb->s_dev);
 973                                retval = do_remount_sb(sb, flags, data);
 974                        }
 975                }
 976                dput(dentry);
 977        }
 978        return retval;
 979}
 980
 981static int copy_mount_options (const void * data, unsigned long *where)
 982{
 983        int i;
 984        unsigned long page;
 985        struct vm_area_struct * vma;
 986
 987        *where = 0;
 988        if (!data)
 989                return 0;
 990
 991        /* If this is the kernel, just trust the pointer. */
 992        if (segment_eq(get_fs(), KERNEL_DS)) {
 993                *where = (unsigned long) data;
 994                return 0;
 995        }
 996
 997        vma = find_vma(current->mm, (unsigned long) data);
 998        if (!vma || (unsigned long) data < vma->vm_start)
 999                return -EFAULT;
1000        if (!(vma->vm_flags & VM_READ))
1001                return -EFAULT;
1002        i = vma->vm_end - (unsigned long) data;
1003        if (PAGE_SIZE <= (unsigned long) i)
1004                i = PAGE_SIZE-1;
1005        if (!(page = __get_free_page(GFP_KERNEL))) {
1006                return -ENOMEM;
1007        }
1008        if (copy_from_user((void *) page,data,i)) {
1009                free_page(page); 
1010                return -EFAULT;
1011        }
1012        *where = page;
1013        return 0;
1014}
1015
1016static void free_mount_page(unsigned long page)
1017{
1018        if (segment_eq(get_fs(), KERNEL_DS))
1019                return;
1020        free_page(page);
1021}
1022
1023/*
1024 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1025 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1026 *
1027 * data is a (void *) that can point to any structure up to
1028 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1029 * information (or be NULL).
1030 *
1031 * NOTE! As old versions of mount() didn't use this setup, the flags
1032 * have to have a special 16-bit magic number in the high word:
1033 * 0xC0ED. If this magic word isn't present, the flags and data info
1034 * aren't used, as the syscall assumes we are talking to an older
1035 * version that didn't understand them.
1036 */
1037asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
1038        unsigned long new_flags, void * data)
1039{
1040        struct file_system_type * fstype;
1041        struct dentry * dentry = NULL;
1042        struct inode * inode = NULL;
1043        kdev_t dev;
1044        int retval = -EPERM;
1045        unsigned long flags = 0;
1046        unsigned long page = 0;
1047        struct file dummy;      /* allows read-write or read-only flag */
1048
1049        lock_kernel();
1050        if (!capable(CAP_SYS_ADMIN))
1051                goto out;
1052        if ((new_flags &
1053             (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1054                retval = copy_mount_options (data, &page);
1055                if (retval < 0)
1056                        goto out;
1057                retval = do_remount(dir_name,
1058                                    new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1059                                    (char *) page);
1060                free_mount_page(page);
1061                goto out;
1062        }
1063
1064        retval = copy_mount_options (type, &page);
1065        if (retval < 0)
1066                goto out;
1067        fstype = get_fs_type((char *) page);
1068        free_mount_page(page);
1069        retval = -ENODEV;
1070        if (!fstype)            
1071                goto out;
1072
1073        memset(&dummy, 0, sizeof(dummy));
1074        if (fstype->fs_flags & FS_REQUIRES_DEV) {
1075                dentry = namei(dev_name);
1076                retval = PTR_ERR(dentry);
1077                if (IS_ERR(dentry))
1078                        goto out;
1079
1080                inode = dentry->d_inode;
1081                retval = -ENOTBLK;
1082                if (!S_ISBLK(inode->i_mode))
1083                        goto dput_and_out;
1084
1085                retval = -EACCES;
1086                if (IS_NODEV(inode))
1087                        goto dput_and_out;
1088
1089                dev = inode->i_rdev;
1090                retval = -ENXIO;
1091                if (MAJOR(dev) >= MAX_BLKDEV)
1092                        goto dput_and_out;
1093
1094                retval = -ENOTBLK;
1095                dummy.f_op = get_blkfops(MAJOR(dev));
1096                if (!dummy.f_op)
1097                        goto dput_and_out;
1098
1099                if (dummy.f_op->open) {
1100                        dummy.f_dentry = dentry;
1101                        dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1102                        retval = dummy.f_op->open(inode, &dummy);
1103                        if (retval)
1104                                goto dput_and_out;
1105                }
1106
1107        } else {
1108                retval = -EMFILE;
1109                if (!(dev = get_unnamed_dev()))
1110                        goto out;
1111        }
1112
1113        page = 0;
1114        if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1115                flags = new_flags & ~MS_MGC_MSK;
1116                retval = copy_mount_options(data, &page);
1117                if (retval < 0)
1118                        goto clean_up;
1119        }
1120        retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1121                                (void *) page);
1122        free_mount_page(page);
1123        if (retval)
1124                goto clean_up;
1125
1126dput_and_out:
1127        dput(dentry);
1128out:
1129        unlock_kernel();
1130        return retval;
1131
1132clean_up:
1133        if (dummy.f_op) {
1134                if (dummy.f_op->release)
1135                        dummy.f_op->release(inode, NULL);
1136        } else
1137                put_unnamed_dev(dev);
1138        goto dput_and_out;
1139}
1140
1141void __init mount_root(void)
1142{
1143        struct file_system_type * fs_type;
1144        struct super_block * sb;
1145        struct vfsmount *vfsmnt;
1146        struct inode * d_inode = NULL;
1147        struct file filp;
1148        int retval;
1149
1150#ifdef CONFIG_ROOT_NFS
1151        if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1152                ROOT_DEV = 0;
1153                if ((fs_type = get_fs_type("nfs"))) {
1154                        sb = get_empty_super(); /* "can't fail" */
1155                        sb->s_dev = get_unnamed_dev();
1156                        sb->s_flags = root_mountflags;
1157                        sema_init(&sb->s_vfs_rename_sem,1);
1158                        sema_init(&sb->s_nfsd_free_path_sem,1);
1159                        vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1160                        if (vfsmnt) {
1161                                if (nfs_root_mount(sb) >= 0) {
1162                                        sb->s_dirt = 0;
1163                                        sb->s_type = fs_type;
1164                                        current->fs->root = dget(sb->s_root);
1165                                        current->fs->pwd = dget(sb->s_root);
1166                                        ROOT_DEV = sb->s_dev;
1167                                        printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1168                                        return;
1169                                }
1170                                remove_vfsmnt(sb->s_dev);
1171                        }
1172                        put_unnamed_dev(sb->s_dev);
1173                        sb->s_dev = 0;
1174                }
1175                if (!ROOT_DEV) {
1176                        printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1177                        ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1178                }
1179        }
1180#endif
1181
1182#ifdef CONFIG_BLK_DEV_FD
1183        if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1184#ifdef CONFIG_BLK_DEV_RAM       
1185                extern int rd_doload;
1186#endif          
1187                floppy_eject();
1188#ifndef CONFIG_BLK_DEV_RAM
1189                printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1190#else
1191                /* rd_doload is 2 for a dual initrd/ramload setup */
1192                if(rd_doload==2)
1193                        rd_load_secondary();
1194                else
1195#endif          
1196                {
1197                        printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1198                        wait_for_keypress();
1199                }
1200        }
1201#endif
1202
1203        memset(&filp, 0, sizeof(filp));
1204        d_inode = get_empty_inode();
1205        d_inode->i_rdev = ROOT_DEV;
1206        filp.f_dentry = NULL;
1207        if ( root_mountflags & MS_RDONLY)
1208                filp.f_mode = 1; /* read only */
1209        else
1210                filp.f_mode = 3; /* read write */
1211        retval = blkdev_open(d_inode, &filp);
1212        if (retval == -EROFS) {
1213                root_mountflags |= MS_RDONLY;
1214                filp.f_mode = 1;
1215                retval = blkdev_open(d_inode, &filp);
1216        }
1217        iput(d_inode);
1218        if (retval)
1219                /*
1220                 * Allow the user to distinguish between failed open
1221                 * and bad superblock on root device.
1222                 */
1223                printk("VFS: Cannot open root device %s\n",
1224                       kdevname(ROOT_DEV));
1225        else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1226                if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1227                        continue;
1228                sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1229                if (sb) {
1230                        sb->s_flags = root_mountflags;
1231                        current->fs->root = dget(sb->s_root);
1232                        current->fs->pwd = dget(sb->s_root);
1233                        printk ("VFS: Mounted root (%s filesystem)%s.\n",
1234                                fs_type->name,
1235                                (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1236                        vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1237                        if (vfsmnt)
1238                                return;
1239                        panic("VFS: add_vfsmnt failed for root fs");
1240                }
1241        }
1242        panic("VFS: Unable to mount root fs on %s",
1243                kdevname(ROOT_DEV));
1244}
1245
1246
1247#ifdef CONFIG_BLK_DEV_INITRD
1248
1249int __init change_root(kdev_t new_root_dev,const char *put_old)
1250{
1251        kdev_t old_root_dev;
1252        struct vfsmount *vfsmnt;
1253        struct dentry *old_root,*old_pwd,*dir_d = NULL;
1254        int error;
1255
1256        old_root = current->fs->root;
1257        old_pwd = current->fs->pwd;
1258        old_root_dev = ROOT_DEV;
1259        if (!fs_may_mount(new_root_dev)) {
1260                printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1261                return -EBUSY;
1262        }
1263        ROOT_DEV = new_root_dev;
1264        mount_root();
1265        dput(old_root);
1266        dput(old_pwd);
1267#if 1
1268        shrink_dcache();
1269        printk("change_root: old root has d_count=%d\n", old_root->d_count);
1270#endif
1271        /*
1272         * Get the new mount directory
1273         */
1274        dir_d = lookup_dentry(put_old, NULL, 1);
1275        if (IS_ERR(dir_d)) {
1276                error = PTR_ERR(dir_d);
1277        } else if (!dir_d->d_inode) {
1278                dput(dir_d);
1279                error = -ENOENT;
1280        } else {
1281                error = 0;
1282        }
1283        if (!error && dir_d->d_covers != dir_d) {
1284                dput(dir_d);
1285                error = -EBUSY;
1286        }
1287        if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1288                dput(dir_d);
1289                error = -ENOTDIR;
1290        }
1291        if (error) {
1292                int umount_error;
1293
1294                printk(KERN_NOTICE "Trying to unmount old root ... ");
1295                umount_error = do_umount(old_root_dev,1, 0);
1296                if (!umount_error) {
1297                        printk("okay\n");
1298                        /* special: the old device driver is going to be
1299                           a ramdisk and the point of this call is to free its
1300                           protected memory (even if dirty). */
1301                        destroy_buffers(old_root_dev);
1302                        return 0;
1303                }
1304                printk(KERN_ERR "error %d\n",umount_error);
1305                return error;
1306        }
1307        remove_vfsmnt(old_root_dev);
1308        vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1309        if (vfsmnt) {
1310                d_mount(dir_d,old_root);
1311                return 0;
1312        }
1313        printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1314        return -ENOMEM;
1315}
1316
1317#endif
1318
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.