1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/slab.h>
13#include <linux/sched.h>
14#include <linux/smp_lock.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/quotaops.h>
18#include <linux/acct.h>
19#include <linux/capability.h>
20#include <linux/module.h>
21#include <linux/sysfs.h>
22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h>
24#include <linux/namei.h>
25#include <linux/security.h>
26#include <linux/mount.h>
27#include <linux/ramfs.h>
28#include <asm/uaccess.h>
29#include <asm/unistd.h>
30#include "pnode.h"
31
32
33__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
34
35static int event;
36
37static struct list_head *mount_hashtable __read_mostly;
38static int hash_mask __read_mostly, hash_bits __read_mostly;
39static struct kmem_cache *mnt_cache __read_mostly;
40static struct rw_semaphore namespace_sem;
41
42
43decl_subsys(fs, NULL, NULL);
44EXPORT_SYMBOL_GPL(fs_subsys);
45
46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
47{
48 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
49 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
50 tmp = tmp + (tmp >> hash_bits);
51 return tmp & hash_mask;
52}
53
54struct vfsmount *alloc_vfsmnt(const char *name)
55{
56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
57 if (mnt) {
58 memset(mnt, 0, sizeof(struct vfsmount));
59 atomic_set(&mnt->mnt_count, 1);
60 INIT_LIST_HEAD(&mnt->mnt_hash);
61 INIT_LIST_HEAD(&mnt->mnt_child);
62 INIT_LIST_HEAD(&mnt->mnt_mounts);
63 INIT_LIST_HEAD(&mnt->mnt_list);
64 INIT_LIST_HEAD(&mnt->mnt_expire);
65 INIT_LIST_HEAD(&mnt->mnt_share);
66 INIT_LIST_HEAD(&mnt->mnt_slave_list);
67 INIT_LIST_HEAD(&mnt->mnt_slave);
68 if (name) {
69 int size = strlen(name) + 1;
70 char *newname = kmalloc(size, GFP_KERNEL);
71 if (newname) {
72 memcpy(newname, name, size);
73 mnt->mnt_devname = newname;
74 }
75 }
76 }
77 return mnt;
78}
79
80int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
81{
82 mnt->mnt_sb = sb;
83 mnt->mnt_root = dget(sb->s_root);
84 return 0;
85}
86
87EXPORT_SYMBOL(simple_set_mnt);
88
89void free_vfsmnt(struct vfsmount *mnt)
90{
91 kfree(mnt->mnt_devname);
92 kmem_cache_free(mnt_cache, mnt);
93}
94
95
96
97
98
99struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
100 int dir)
101{
102 struct list_head *head = mount_hashtable + hash(mnt, dentry);
103 struct list_head *tmp = head;
104 struct vfsmount *p, *found = NULL;
105
106 for (;;) {
107 tmp = dir ? tmp->next : tmp->prev;
108 p = NULL;
109 if (tmp == head)
110 break;
111 p = list_entry(tmp, struct vfsmount, mnt_hash);
112 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
113 found = p;
114 break;
115 }
116 }
117 return found;
118}
119
120
121
122
123
124struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
125{
126 struct vfsmount *child_mnt;
127 spin_lock(&vfsmount_lock);
128 if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
129 mntget(child_mnt);
130 spin_unlock(&vfsmount_lock);
131 return child_mnt;
132}
133
134static inline int check_mnt(struct vfsmount *mnt)
135{
136 return mnt->mnt_ns == current->nsproxy->mnt_ns;
137}
138
139static void touch_mnt_namespace(struct mnt_namespace *ns)
140{
141 if (ns) {
142 ns->event = ++event;
143 wake_up_interruptible(&ns->poll);
144 }
145}
146
147static void __touch_mnt_namespace(struct mnt_namespace *ns)
148{
149 if (ns && ns->event != event) {
150 ns->event = event;
151 wake_up_interruptible(&ns->poll);
152 }
153}
154
155static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
156{
157 old_nd->dentry = mnt->mnt_mountpoint;
158 old_nd->mnt = mnt->mnt_parent;
159 mnt->mnt_parent = mnt;
160 mnt->mnt_mountpoint = mnt->mnt_root;
161 list_del_init(&mnt->mnt_child);
162 list_del_init(&mnt->mnt_hash);
163 old_nd->dentry->d_mounted--;
164}
165
166void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
167 struct vfsmount *child_mnt)
168{
169 child_mnt->mnt_parent = mntget(mnt);
170 child_mnt->mnt_mountpoint = dget(dentry);
171 dentry->d_mounted++;
172}
173
174static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
175{
176 mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
177 list_add_tail(&mnt->mnt_hash, mount_hashtable +
178 hash(nd->mnt, nd->dentry));
179 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
180}
181
182
183
184
185static void commit_tree(struct vfsmount *mnt)
186{
187 struct vfsmount *parent = mnt->mnt_parent;
188 struct vfsmount *m;
189 LIST_HEAD(head);
190 struct mnt_namespace *n = parent->mnt_ns;
191
192 BUG_ON(parent == mnt);
193
194 list_add_tail(&head, &mnt->mnt_list);
195 list_for_each_entry(m, &head, mnt_list)
196 m->mnt_ns = n;
197 list_splice(&head, n->list.prev);
198
199 list_add_tail(&mnt->mnt_hash, mount_hashtable +
200 hash(parent, mnt->mnt_mountpoint));
201 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
202 touch_mnt_namespace(n);
203}
204
205static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
206{
207 struct list_head *next = p->mnt_mounts.next;
208 if (next == &p->mnt_mounts) {
209 while (1) {
210 if (p == root)
211 return NULL;
212 next = p->mnt_child.next;
213 if (next != &p->mnt_parent->mnt_mounts)
214 break;
215 p = p->mnt_parent;
216 }
217 }
218 return list_entry(next, struct vfsmount, mnt_child);
219}
220
221static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
222{
223 struct list_head *prev = p->mnt_mounts.prev;
224 while (prev != &p->mnt_mounts) {
225 p = list_entry(prev, struct vfsmount, mnt_child);
226 prev = p->mnt_mounts.prev;
227 }
228 return p;
229}
230
231static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
232 int flag)
233{
234 struct super_block *sb = old->mnt_sb;
235 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
236
237 if (mnt) {
238 mnt->mnt_flags = old->mnt_flags;
239 atomic_inc(&sb->s_active);
240 mnt->mnt_sb = sb;
241 mnt->mnt_root = dget(root);
242 mnt->mnt_mountpoint = mnt->mnt_root;
243 mnt->mnt_parent = mnt;
244
245 if (flag & CL_SLAVE) {
246 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
247 mnt->mnt_master = old;
248 CLEAR_MNT_SHARED(mnt);
249 } else {
250 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
251 list_add(&mnt->mnt_share, &old->mnt_share);
252 if (IS_MNT_SLAVE(old))
253 list_add(&mnt->mnt_slave, &old->mnt_slave);
254 mnt->mnt_master = old->mnt_master;
255 }
256 if (flag & CL_MAKE_SHARED)
257 set_mnt_shared(mnt);
258
259
260
261 if (flag & CL_EXPIRE) {
262 spin_lock(&vfsmount_lock);
263 if (!list_empty(&old->mnt_expire))
264 list_add(&mnt->mnt_expire, &old->mnt_expire);
265 spin_unlock(&vfsmount_lock);
266 }
267 }
268 return mnt;
269}
270
271static inline void __mntput(struct vfsmount *mnt)
272{
273 struct super_block *sb = mnt->mnt_sb;
274 dput(mnt->mnt_root);
275 free_vfsmnt(mnt);
276 deactivate_super(sb);
277}
278
279void mntput_no_expire(struct vfsmount *mnt)
280{
281repeat:
282 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
283 if (likely(!mnt->mnt_pinned)) {
284 spin_unlock(&vfsmount_lock);
285 __mntput(mnt);
286 return;
287 }
288 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
289 mnt->mnt_pinned = 0;
290 spin_unlock(&vfsmount_lock);
291 acct_auto_close_mnt(mnt);
292 security_sb_umount_close(mnt);
293 goto repeat;
294 }
295}
296
297EXPORT_SYMBOL(mntput_no_expire);
298
299void mnt_pin(struct vfsmount *mnt)
300{
301 spin_lock(&vfsmount_lock);
302 mnt->mnt_pinned++;
303 spin_unlock(&vfsmount_lock);
304}
305
306EXPORT_SYMBOL(mnt_pin);
307
308void mnt_unpin(struct vfsmount *mnt)
309{
310 spin_lock(&vfsmount_lock);
311 if (mnt->mnt_pinned) {
312 atomic_inc(&mnt->mnt_count);
313 mnt->mnt_pinned--;
314 }
315 spin_unlock(&vfsmount_lock);
316}
317
318EXPORT_SYMBOL(mnt_unpin);
319
320
321static void *m_start(struct seq_file *m, loff_t *pos)
322{
323 struct mnt_namespace *n = m->private;
324 struct list_head *p;
325 loff_t l = *pos;
326
327 down_read(&namespace_sem);
328 list_for_each(p, &n->list)
329 if (!l--)
330 return list_entry(p, struct vfsmount, mnt_list);
331 return NULL;
332}
333
334static void *m_next(struct seq_file *m, void *v, loff_t *pos)
335{
336 struct mnt_namespace *n = m->private;
337 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
338 (*pos)++;
339 return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
340}
341
342static void m_stop(struct seq_file *m, void *v)
343{
344 up_read(&namespace_sem);
345}
346
347static inline void mangle(struct seq_file *m, const char *s)
348{
349 seq_escape(m, s, " \t\n\\");
350}
351
352static int show_vfsmnt(struct seq_file *m, void *v)
353{
354 struct vfsmount *mnt = v;
355 int err = 0;
356 static struct proc_fs_info {
357 int flag;
358 char *str;
359 } fs_info[] = {
360 { MS_SYNCHRONOUS, ",sync" },
361 { MS_DIRSYNC, ",dirsync" },
362 { MS_MANDLOCK, ",mand" },
363 { 0, NULL }
364 };
365 static struct proc_fs_info mnt_info[] = {
366 { MNT_NOSUID, ",nosuid" },
367 { MNT_NODEV, ",nodev" },
368 { MNT_NOEXEC, ",noexec" },
369 { MNT_NOATIME, ",noatime" },
370 { MNT_NODIRATIME, ",nodiratime" },
371 { MNT_RELATIME, ",relatime" },
372 { 0, NULL }
373 };
374 struct proc_fs_info *fs_infop;
375
376 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
377 seq_putc(m, ' ');
378 seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
379 seq_putc(m, ' ');
380 mangle(m, mnt->mnt_sb->s_type->name);
381 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
382 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
383 if (mnt->mnt_sb->s_flags & fs_infop->flag)
384 seq_puts(m, fs_infop->str);
385 }
386 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
387 if (mnt->mnt_flags & fs_infop->flag)
388 seq_puts(m, fs_infop->str);
389 }
390 if (mnt->mnt_sb->s_op->show_options)
391 err = mnt->mnt_sb->s_op->show_options(m, mnt);
392 seq_puts(m, " 0 0\n");
393 return err;
394}
395
396struct seq_operations mounts_op = {
397 .start = m_start,
398 .next = m_next,
399 .stop = m_stop,
400 .show = show_vfsmnt
401};
402
403static int show_vfsstat(struct seq_file *m, void *v)
404{
405 struct vfsmount *mnt = v;
406 int err = 0;
407
408
409 if (mnt->mnt_devname) {
410 seq_puts(m, "device ");
411 mangle(m, mnt->mnt_devname);
412 } else
413 seq_puts(m, "no device");
414
415
416 seq_puts(m, " mounted on ");
417 seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
418 seq_putc(m, ' ');
419
420
421 seq_puts(m, "with fstype ");
422 mangle(m, mnt->mnt_sb->s_type->name);
423
424
425 if (mnt->mnt_sb->s_op->show_stats) {
426 seq_putc(m, ' ');
427 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
428 }
429
430 seq_putc(m, '\n');
431 return err;
432}
433
434struct seq_operations mountstats_op = {
435 .start = m_start,
436 .next = m_next,
437 .stop = m_stop,
438 .show = show_vfsstat,
439};
440
441
442
443
444
445
446
447
448
449int may_umount_tree(struct vfsmount *mnt)
450{
451 int actual_refs = 0;
452 int minimum_refs = 0;
453 struct vfsmount *p;
454
455 spin_lock(&vfsmount_lock);
456 for (p = mnt; p; p = next_mnt(p, mnt)) {
457 actual_refs += atomic_read(&p->mnt_count);
458 minimum_refs += 2;
459 }
460 spin_unlock(&vfsmount_lock);
461
462 if (actual_refs > minimum_refs)
463 return 0;
464
465 return 1;
466}
467
468EXPORT_SYMBOL(may_umount_tree);
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483int may_umount(struct vfsmount *mnt)
484{
485 int ret = 1;
486 spin_lock(&vfsmount_lock);
487 if (propagate_mount_busy(mnt, 2))
488 ret = 0;
489 spin_unlock(&vfsmount_lock);
490 return ret;
491}
492
493EXPORT_SYMBOL(may_umount);
494
495void release_mounts(struct list_head *head)
496{
497 struct vfsmount *mnt;
498 while (!list_empty(head)) {
499 mnt = list_entry(head->next, struct vfsmount, mnt_hash);
500 list_del_init(&mnt->mnt_hash);
501 if (mnt->mnt_parent != mnt) {
502 struct dentry *dentry;
503 struct vfsmount *m;
504 spin_lock(&vfsmount_lock);
505 dentry = mnt->mnt_mountpoint;
506 m = mnt->mnt_parent;
507 mnt->mnt_mountpoint = mnt->mnt_root;
508 mnt->mnt_parent = mnt;
509 spin_unlock(&vfsmount_lock);
510 dput(dentry);
511 mntput(m);
512 }
513 mntput(mnt);
514 }
515}
516
517void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
518{
519 struct vfsmount *p;
520
521 for (p = mnt; p; p = next_mnt(p, mnt))
522 list_move(&p->mnt_hash, kill);
523
524 if (propagate)
525 propagate_umount(kill);
526
527 list_for_each_entry(p, kill, mnt_hash) {
528 list_del_init(&p->mnt_expire);
529 list_del_init(&p->mnt_list);
530 __touch_mnt_namespace(p->mnt_ns);
531 p->mnt_ns = NULL;
532 list_del_init(&p->mnt_child);
533 if (p->mnt_parent != p)
534 p->mnt_mountpoint->d_mounted--;
535 change_mnt_propagation(p, MS_PRIVATE);
536 }
537}
538
539static int do_umount(struct vfsmount *mnt, int flags)
540{
541 struct super_block *sb = mnt->mnt_sb;
542 int retval;
543 LIST_HEAD(umount_list);
544
545 retval = security_sb_umount(mnt, flags);
546 if (retval)
547 return retval;
548
549
550
551
552
553
554
555 if (flags & MNT_EXPIRE) {
556 if (mnt == current->fs->rootmnt ||
557 flags & (MNT_FORCE | MNT_DETACH))
558 return -EINVAL;
559
560 if (atomic_read(&mnt->mnt_count) != 2)
561 return -EBUSY;
562
563 if (!xchg(&mnt->mnt_expiry_mark, 1))
564 return -EAGAIN;
565 }
566
567
568
569
570
571
572
573
574
575
576
577 lock_kernel();
578 if (sb->s_op->umount_begin)
579 sb->s_op->umount_begin(mnt, flags);
580 unlock_kernel();
581
582
583
584
585
586
587
588
589
590
591 if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) {
592
593
594
595
596 down_write(&sb->s_umount);
597 if (!(sb->s_flags & MS_RDONLY)) {
598 lock_kernel();
599 DQUOT_OFF(sb);
600 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
601 unlock_kernel();
602 }
603 up_write(&sb->s_umount);
604 return retval;
605 }
606
607 down_write(&namespace_sem);
608 spin_lock(&vfsmount_lock);
609 event++;
610
611 retval = -EBUSY;
612 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
613 if (!list_empty(&mnt->mnt_list))
614 umount_tree(mnt, 1, &umount_list);
615 retval = 0;
616 }
617 spin_unlock(&vfsmount_lock);
618 if (retval)
619 security_sb_umount_busy(mnt);
620 up_write(&namespace_sem);
621 release_mounts(&umount_list);
622 return retval;
623}
624
625
626
627
628
629
630
631
632
633asmlinkage long sys_umount(char __user * name, int flags)
634{
635 struct nameidata nd;
636 int retval;
637
638 retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
639 if (retval)
640 goto out;
641 retval = -EINVAL;
642 if (nd.dentry != nd.mnt->mnt_root)
643 goto dput_and_out;
644 if (!check_mnt(nd.mnt))
645 goto dput_and_out;
646
647 retval = -EPERM;
648 if (!capable(CAP_SYS_ADMIN))
649 goto dput_and_out;
650
651 retval = do_umount(nd.mnt, flags);
652dput_and_out:
653 path_release_on_umount(&nd);
654out:
655 return retval;
656}
657
658#ifdef __ARCH_WANT_SYS_OLDUMOUNT
659
660
661
662
663asmlinkage long sys_oldumount(char __user * name)
664{
665 return sys_umount(name, 0);
666}
667
668#endif
669
670static int mount_is_safe(struct nameidata *nd)
671{
672 if (capable(CAP_SYS_ADMIN))
673 return 0;
674 return -EPERM;
675#ifdef notyet
676 if (S_ISLNK(nd->dentry->d_inode->i_mode))
677 return -EPERM;
678 if (nd->dentry->d_inode->i_mode & S_ISVTX) {
679 if (current->uid != nd->dentry->d_inode->i_uid)
680 return -EPERM;
681 }
682 if (vfs_permission(nd, MAY_WRITE))
683 return -EPERM;
684 return 0;
685#endif
686}
687
688static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
689{
690 while (1) {
691 if (d == dentry)
692 return 1;
693 if (d == NULL || d == d->d_parent)
694 return 0;
695 d = d->d_parent;
696 }
697}
698
699struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
700 int flag)
701{
702 struct vfsmount *res, *p, *q, *r, *s;
703 struct nameidata nd;
704
705 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
706 return NULL;
707
708 res = q = clone_mnt(mnt, dentry, flag);
709 if (!q)
710 goto Enomem;
711 q->mnt_mountpoint = mnt->mnt_mountpoint;
712
713 p = mnt;
714 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
715 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
716 continue;
717
718 for (s = r; s; s = next_mnt(s, r)) {
719 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
720 s = skip_mnt_tree(s);
721 continue;
722 }
723 while (p != s->mnt_parent) {
724 p = p->mnt_parent;
725 q = q->mnt_parent;
726 }
727 p = s;
728 nd.mnt = q;
729 nd.dentry = p->mnt_mountpoint;
730 q = clone_mnt(p, p->mnt_root, flag);
731 if (!q)
732 goto Enomem;
733 spin_lock(&vfsmount_lock);
734 list_add_tail(&q->mnt_list, &res->mnt_list);
735 attach_mnt(q, &nd);
736 spin_unlock(&vfsmount_lock);
737 }
738 }
739 return res;
740Enomem:
741 if (res) {
742 LIST_HEAD(umount_list);
743 spin_lock(&vfsmount_lock);
744 umount_tree(res, 0, &umount_list);
745 spin_unlock(&vfsmount_lock);
746 release_mounts(&umount_list);
747 }
748 return NULL;
749}
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814static int attach_recursive_mnt(struct vfsmount *source_mnt,
815 struct nameidata *nd, struct nameidata *parent_nd)
816{
817 LIST_HEAD(tree_list);
818 struct vfsmount *dest_mnt = nd->mnt;
819 struct dentry *dest_dentry = nd->dentry;
820 struct vfsmount *child, *p;
821
822 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
823 return -EINVAL;
824
825 if (IS_MNT_SHARED(dest_mnt)) {
826 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
827 set_mnt_shared(p);
828 }
829
830 spin_lock(&vfsmount_lock);
831 if (parent_nd) {
832 detach_mnt(source_mnt, parent_nd);
833 attach_mnt(source_mnt, nd);
834 touch_mnt_namespace(current->nsproxy->mnt_ns);
835 } else {
836 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
837 commit_tree(source_mnt);
838 }
839
840 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
841 list_del_init(&child->mnt_hash);
842 commit_tree(child);
843 }
844 spin_unlock(&vfsmount_lock);
845 return 0;
846}
847
848static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
849{
850 int err;
851 if (mnt->mnt_sb->s_flags & MS_NOUSER)
852 return -EINVAL;
853
854 if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
855 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
856 return -ENOTDIR;
857
858 err = -ENOENT;
859 mutex_lock(&nd->dentry->d_inode->i_mutex);
860 if (IS_DEADDIR(nd->dentry->d_inode))
861 goto out_unlock;
862
863 err = security_sb_check_sb(mnt, nd);
864 if (err)
865 goto out_unlock;
866
867 err = -ENOENT;
868 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
869 err = attach_recursive_mnt(mnt, nd, NULL);
870out_unlock:
871 mutex_unlock(&nd->dentry->d_inode->i_mutex);
872 if (!err)
873 security_sb_post_addmount(mnt, nd);
874 return err;
875}
876
877
878
879
880static int do_change_type(struct nameidata *nd, int flag)
881{
882 struct vfsmount *m, *mnt = nd->mnt;
883 int recurse = flag & MS_REC;
884 int type = flag & ~MS_REC;
885
886 if (nd->dentry != nd->mnt->mnt_root)
887 return -EINVAL;
888
889 down_write(&namespace_sem);
890 spin_lock(&vfsmount_lock);
891 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
892 change_mnt_propagation(m, type);
893 spin_unlock(&vfsmount_lock);
894 up_write(&namespace_sem);
895 return 0;
896}
897
898
899
900
901static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
902{
903 struct nameidata old_nd;
904 struct vfsmount *mnt = NULL;
905 int err = mount_is_safe(nd);
906 if (err)
907 return err;
908 if (!old_name || !*old_name)
909 return -EINVAL;
910 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
911 if (err)
912 return err;
913
914 down_write(&namespace_sem);
915 err = -EINVAL;
916 if (IS_MNT_UNBINDABLE(old_nd.mnt))
917 goto out;
918
919 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
920 goto out;
921
922 err = -ENOMEM;
923 if (recurse)
924 mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0);
925 else
926 mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0);
927
928 if (!mnt)
929 goto out;
930
931 err = graft_tree(mnt, nd);
932 if (err) {
933 LIST_HEAD(umount_list);
934 spin_lock(&vfsmount_lock);
935 umount_tree(mnt, 0, &umount_list);
936 spin_unlock(&vfsmount_lock);
937 release_mounts(&umount_list);
938 }
939
940out:
941 up_write(&namespace_sem);
942 path_release(&old_nd);
943 return err;
944}
945
946
947
948
949
950
951static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
952 void *data)
953{
954 int err;
955 struct super_block *sb = nd->mnt->mnt_sb;
956
957 if (!capable(CAP_SYS_ADMIN))
958 return -EPERM;
959
960 if (!check_mnt(nd->mnt))
961 return -EINVAL;
962
963 if (nd->dentry != nd->mnt->mnt_root)
964 return -EINVAL;
965
966 down_write(&sb->s_umount);
967 err = do_remount_sb(sb, flags, data, 0);
968 if (!err)
969 nd->mnt->mnt_flags = mnt_flags;
970 up_write(&sb->s_umount);
971 if (!err)
972 security_sb_post_remount(nd->mnt, flags, data);
973 return err;
974}
975
976static inline int tree_contains_unbindable(struct vfsmount *mnt)
977{
978 struct vfsmount *p;
979 for (p = mnt; p; p = next_mnt(p, mnt)) {
980 if (IS_MNT_UNBINDABLE(p))
981 return 1;
982 }
983 return 0;
984}
985
986static int do_move_mount(struct nameidata *nd, char *old_name)
987{
988 struct nameidata old_nd, parent_nd;
989 struct vfsmount *p;
990 int err = 0;
991 if (!capable(CAP_SYS_ADMIN))
992 return -EPERM;
993 if (!old_name || !*old_name)
994 return -EINVAL;
995 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
996 if (err)
997 return err;
998
999 down_write(&namespace_sem);
1000 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
1001 ;
1002 err = -EINVAL;
1003 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
1004 goto out;
1005
1006 err = -ENOENT;
1007 mutex_lock(&nd->dentry->d_inode->i_mutex);
1008 if (IS_DEADDIR(nd->dentry->d_inode))
1009 goto out1;
1010
1011 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
1012 goto out1;
1013
1014 err = -EINVAL;
1015 if (old_nd.dentry != old_nd.mnt->mnt_root)
1016 goto out1;
1017
1018 if (old_nd.mnt == old_nd.mnt->mnt_parent)
1019 goto out1;
1020
1021 if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
1022 S_ISDIR(old_nd.dentry->d_inode->i_mode))
1023 goto out1;
1024
1025
1026
1027 if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
1028 goto out1;
1029
1030
1031
1032
1033 if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
1034 goto out1;
1035 err = -ELOOP;
1036 for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
1037 if (p == old_nd.mnt)
1038 goto out1;
1039
1040 if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd)))
1041 goto out1;
1042
1043 spin_lock(&vfsmount_lock);
1044
1045
1046 list_del_init(&old_nd.mnt->mnt_expire);
1047 spin_unlock(&vfsmount_lock);
1048out1:
1049 mutex_unlock(&nd->dentry->d_inode->i_mutex);
1050out:
1051 up_write(&namespace_sem);
1052 if (!err)
1053 path_release(&parent_nd);
1054 path_release(&old_nd);
1055 return err;
1056}
1057
1058
1059
1060
1061
1062static int do_new_mount(struct nameidata *nd, char *type, int flags,
1063 int mnt_flags, char *name, void *data)
1064{
1065 struct vfsmount *mnt;
1066
1067 if (!type || !memchr(type, 0, PAGE_SIZE))
1068 return -EINVAL;
1069
1070
1071 if (!capable(CAP_SYS_ADMIN))
1072 return -EPERM;
1073
1074 mnt = do_kern_mount(type, flags, name, data);
1075 if (IS_ERR(mnt))
1076 return PTR_ERR(mnt);
1077
1078 return do_add_mount(mnt, nd, mnt_flags, NULL);
1079}
1080
1081
1082
1083
1084
1085int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1086 int mnt_flags, struct list_head *fslist)
1087{
1088 int err;
1089
1090 down_write(&namespace_sem);
1091
1092 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
1093 ;
1094 err = -EINVAL;
1095 if (!check_mnt(nd->mnt))
1096 goto unlock;
1097
1098
1099 err = -EBUSY;
1100 if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
1101 nd->mnt->mnt_root == nd->dentry)
1102 goto unlock;
1103
1104 err = -EINVAL;
1105 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1106 goto unlock;
1107
1108 newmnt->mnt_flags = mnt_flags;
1109 if ((err = graft_tree(newmnt, nd)))
1110 goto unlock;
1111
1112 if (fslist) {
1113
1114 spin_lock(&vfsmount_lock);
1115 list_add_tail(&newmnt->mnt_expire, fslist);
1116 spin_unlock(&vfsmount_lock);
1117 }
1118 up_write(&namespace_sem);
1119 return 0;
1120
1121unlock:
1122 up_write(&namespace_sem);
1123 mntput(newmnt);
1124 return err;
1125}
1126
1127EXPORT_SYMBOL_GPL(do_add_mount);
1128
1129static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
1130 struct list_head *umounts)
1131{
1132 spin_lock(&vfsmount_lock);
1133
1134
1135
1136
1137
1138 if (mnt->mnt_parent == mnt) {
1139 spin_unlock(&vfsmount_lock);
1140 return;
1141 }
1142
1143
1144
1145
1146
1147 if (!propagate_mount_busy(mnt, 2)) {
1148
1149 touch_mnt_namespace(mnt->mnt_ns);
1150 list_del_init(&mnt->mnt_list);
1151 mnt->mnt_ns = NULL;
1152 umount_tree(mnt, 1, umounts);
1153 spin_unlock(&vfsmount_lock);
1154 } else {
1155
1156
1157
1158
1159 list_add_tail(&mnt->mnt_expire, mounts);
1160 spin_unlock(&vfsmount_lock);
1161 }
1162}
1163
1164
1165
1166
1167
1168
1169
1170static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts)
1171{
1172 struct mnt_namespace *ns;
1173 struct vfsmount *mnt;
1174
1175 while (!list_empty(graveyard)) {
1176 LIST_HEAD(umounts);
1177 mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire);
1178 list_del_init(&mnt->mnt_expire);
1179
1180
1181
1182 ns = mnt->mnt_ns;
1183 if (!ns || !ns->root)
1184 continue;
1185 get_mnt_ns(ns);
1186
1187 spin_unlock(&vfsmount_lock);
1188 down_write(&namespace_sem);
1189 expire_mount(mnt, mounts, &umounts);
1190 up_write(&namespace_sem);
1191 release_mounts(&umounts);
1192 mntput(mnt);
1193 put_mnt_ns(ns);
1194 spin_lock(&vfsmount_lock);
1195 }
1196}
1197
1198
1199
1200
1201
1202
1203void mark_mounts_for_expiry(struct list_head *mounts)
1204{
1205 struct vfsmount *mnt, *next;
1206 LIST_HEAD(graveyard);
1207
1208 if (list_empty(mounts))
1209 return;
1210
1211 spin_lock(&vfsmount_lock);
1212
1213
1214
1215
1216
1217
1218
1219 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1220 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1221 atomic_read(&mnt->mnt_count) != 1)
1222 continue;
1223
1224 mntget(mnt);
1225 list_move(&mnt->mnt_expire, &graveyard);
1226 }
1227
1228 expire_mount_list(&graveyard, mounts);
1229
1230 spin_unlock(&vfsmount_lock);
1231}
1232
1233EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1234
1235
1236
1237
1238
1239
1240
1241static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1242{
1243 struct vfsmount *this_parent = parent;
1244 struct list_head *next;
1245 int found = 0;
1246
1247repeat:
1248 next = this_parent->mnt_mounts.next;
1249resume:
1250 while (next != &this_parent->mnt_mounts) {
1251 struct list_head *tmp = next;
1252 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1253
1254 next = tmp->next;
1255 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1256 continue;
1257
1258
1259
1260 if (!list_empty(&mnt->mnt_mounts)) {
1261 this_parent = mnt;
1262 goto repeat;
1263 }
1264
1265 if (!propagate_mount_busy(mnt, 1)) {
1266 mntget(mnt);
1267 list_move_tail(&mnt->mnt_expire, graveyard);
1268 found++;
1269 }
1270 }
1271
1272
1273
1274 if (this_parent != parent) {
1275 next = this_parent->mnt_child.next;
1276 this_parent = this_parent->mnt_parent;
1277 goto resume;
1278 }
1279 return found;
1280}
1281
1282
1283
1284
1285
1286void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts)
1287{
1288 LIST_HEAD(graveyard);
1289 int found;
1290
1291 spin_lock(&vfsmount_lock);
1292
1293
1294 while ((found = select_submounts(mountpoint, &graveyard)) != 0)
1295 expire_mount_list(&graveyard, mounts);
1296
1297 spin_unlock(&vfsmount_lock);
1298}
1299
1300EXPORT_SYMBOL_GPL(shrink_submounts);
1301
1302
1303
1304
1305
1306
1307
1308static long exact_copy_from_user(void *to, const void __user * from,
1309 unsigned long n)
1310{
1311 char *t = to;
1312 const char __user *f = from;
1313 char c;
1314
1315 if (!access_ok(VERIFY_READ, from, n))
1316 return n;
1317
1318 while (n) {
1319 if (__get_user(c, f)) {
1320 memset(t, 0, n);
1321 break;
1322 }
1323 *t++ = c;
1324 f++;
1325 n--;
1326 }
1327 return n;
1328}
1329
1330int copy_mount_options(const void __user * data, unsigned long *where)
1331{
1332 int i;
1333 unsigned long page;
1334 unsigned long size;
1335
1336 *where = 0;
1337 if (!data)
1338 return 0;
1339
1340 if (!(page = __get_free_page(GFP_KERNEL)))
1341 return -ENOMEM;
1342
1343
1344
1345
1346
1347
1348 size = TASK_SIZE - (unsigned long)data;
1349 if (size > PAGE_SIZE)
1350 size = PAGE_SIZE;
1351
1352 i = size - exact_copy_from_user((void *)page, data, size);
1353 if (!i) {
1354 free_page(page);
1355 return -EFAULT;
1356 }
1357 if (i != PAGE_SIZE)
1358 memset((char *)page + i, 0, PAGE_SIZE - i);
1359 *where = page;
1360 return 0;
1361}
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377long do_mount(char *dev_name, char *dir_name, char *type_page,
1378 unsigned long flags, void *data_page)
1379{
1380 struct nameidata nd;
1381 int retval = 0;
1382 int mnt_flags = 0;
1383
1384
1385 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1386 flags &= ~MS_MGC_MSK;
1387
1388
1389
1390 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1391 return -EINVAL;
1392 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1393 return -EINVAL;
1394
1395 if (data_page)
1396 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1397
1398
1399 if (flags & MS_NOSUID)
1400 mnt_flags |= MNT_NOSUID;
1401 if (flags & MS_NODEV)
1402 mnt_flags |= MNT_NODEV;
1403 if (flags & MS_NOEXEC)
1404 mnt_flags |= MNT_NOEXEC;
1405 if (flags & MS_NOATIME)
1406 mnt_flags |= MNT_NOATIME;
1407 if (flags & MS_NODIRATIME)
1408 mnt_flags |= MNT_NODIRATIME;
1409 if (flags & MS_RELATIME)
1410 mnt_flags |= MNT_RELATIME;
1411
1412 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1413 MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
1414
1415
1416 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
1417 if (retval)
1418 return retval;
1419
1420 retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
1421 if (retval)
1422 goto dput_out;
1423
1424 if (flags & MS_REMOUNT)
1425 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1426 data_page);
1427 else if (flags & MS_BIND)
1428 retval = do_loopback(&nd, dev_name, flags & MS_REC);
1429 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1430 retval = do_change_type(&nd, flags);
1431 else if (flags & MS_MOVE)
1432 retval = do_move_mount(&nd, dev_name);
1433 else
1434 retval = do_new_mount(&nd, type_page, flags, mnt_flags,
1435 dev_name, data_page);
1436dput_out:
1437 path_release(&nd);
1438 return retval;
1439}
1440
1441
1442
1443
1444
1445struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
1446 struct fs_struct *fs)
1447{
1448 struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns;
1449 struct mnt_namespace *new_ns;
1450 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
1451 struct vfsmount *p, *q;
1452
1453 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1454 if (!new_ns)
1455 return NULL;
1456
1457 atomic_set(&new_ns->count, 1);
1458 INIT_LIST_HEAD(&new_ns->list);
1459 init_waitqueue_head(&new_ns->poll);
1460 new_ns->event = 0;
1461
1462 down_write(&namespace_sem);
1463
1464 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
1465 CL_COPY_ALL | CL_EXPIRE);
1466 if (!new_ns->root) {
1467 up_write(&namespace_sem);
1468 kfree(new_ns);
1469 return NULL;
1470 }
1471 spin_lock(&vfsmount_lock);
1472 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
1473 spin_unlock(&vfsmount_lock);
1474
1475
1476
1477
1478
1479
1480 p = mnt_ns->root;
1481 q = new_ns->root;
1482 while (p) {
1483 q->mnt_ns = new_ns;
1484 if (fs) {
1485 if (p == fs->rootmnt) {
1486 rootmnt = p;
1487 fs->rootmnt = mntget(q);
1488 }
1489 if (p == fs->pwdmnt) {
1490 pwdmnt = p;
1491 fs->pwdmnt = mntget(q);
1492 }
1493 if (p == fs->altrootmnt) {
1494 altrootmnt = p;
1495 fs->altrootmnt = mntget(q);
1496 }
1497 }
1498 p = next_mnt(p, mnt_ns->root);
1499 q = next_mnt(q, new_ns->root);
1500 }
1501 up_write(&namespace_sem);
1502
1503 if (rootmnt)
1504 mntput(rootmnt);
1505 if (pwdmnt)
1506 mntput(pwdmnt);
1507 if (altrootmnt)
1508 mntput(altrootmnt);
1509
1510 return new_ns;
1511}
1512
1513int copy_mnt_ns(int flags, struct task_struct *tsk)
1514{
1515 struct mnt_namespace *ns = tsk->nsproxy->mnt_ns;
1516 struct mnt_namespace *new_ns;
1517 int err = 0;
1518
1519 if (!ns)
1520 return 0;
1521
1522 get_mnt_ns(ns);
1523
1524 if (!(flags & CLONE_NEWNS))
1525 return 0;
1526
1527 if (!capable(CAP_SYS_ADMIN)) {
1528 err = -EPERM;
1529 goto out;
1530 }
1531
1532 new_ns = dup_mnt_ns(tsk, tsk->fs);
1533 if (!new_ns) {
1534 err = -ENOMEM;
1535 goto out;
1536 }
1537
1538 tsk->nsproxy->mnt_ns = new_ns;
1539
1540out:
1541 put_mnt_ns(ns);
1542 return err;
1543}
1544
1545asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
1546 char __user * type, unsigned long flags,
1547 void __user * data)
1548{
1549 int retval;
1550 unsigned long data_page;
1551 unsigned long type_page;
1552 unsigned long dev_page;
1553 char *dir_page;
1554
1555 retval = copy_mount_options(type, &type_page);
1556 if (retval < 0)
1557 return retval;
1558
1559 dir_page = getname(dir_name);
1560 retval = PTR_ERR(dir_page);
1561 if (IS_ERR(dir_page))
1562 goto out1;
1563
1564 retval = copy_mount_options(dev_name, &dev_page);
1565 if (retval < 0)
1566 goto out2;
1567
1568 retval = copy_mount_options(data, &data_page);
1569 if (retval < 0)
1570 goto out3;
1571
1572 lock_kernel();
1573 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
1574 flags, (void *)data_page);
1575 unlock_kernel();
1576 free_page(data_page);
1577
1578out3:
1579 free_page(dev_page);
1580out2:
1581 putname(dir_page);
1582out1:
1583 free_page(type_page);
1584 return retval;
1585}
1586
1587
1588
1589
1590
1591void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
1592 struct dentry *dentry)
1593{
1594 struct dentry *old_root;
1595 struct vfsmount *old_rootmnt;
1596 write_lock(&fs->lock);
1597 old_root = fs->root;
1598 old_rootmnt = fs->rootmnt;
1599 fs->rootmnt = mntget(mnt);
1600 fs->root = dget(dentry);
1601 write_unlock(&fs->lock);
1602 if (old_root) {
1603 dput(old_root);
1604 mntput(old_rootmnt);
1605 }
1606}
1607
1608
1609
1610
1611
1612void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
1613 struct dentry *dentry)
1614{
1615 struct dentry *old_pwd;
1616 struct vfsmount *old_pwdmnt;
1617
1618 write_lock(&fs->lock);
1619 old_pwd = fs->pwd;
1620 old_pwdmnt = fs->pwdmnt;
1621 fs->pwdmnt = mntget(mnt);
1622 fs->pwd = dget(dentry);
1623 write_unlock(&fs->lock);
1624
1625 if (old_pwd) {
1626 dput(old_pwd);
1627 mntput(old_pwdmnt);
1628 }
1629}
1630
1631static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1632{
1633 struct task_struct *g, *p;
1634 struct fs_struct *fs;
1635
1636 read_lock(&tasklist_lock);
1637 do_each_thread(g, p) {
1638 task_lock(p);
1639 fs = p->fs;
1640 if (fs) {
1641 atomic_inc(&fs->count);
1642 task_unlock(p);
1643 if (fs->root == old_nd->dentry
1644 && fs->rootmnt == old_nd->mnt)
1645 set_fs_root(fs, new_nd->mnt, new_nd->dentry);
1646 if (fs->pwd == old_nd->dentry
1647 && fs->pwdmnt == old_nd->mnt)
1648 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry);
1649 put_fs_struct(fs);
1650 } else
1651 task_unlock(p);
1652 } while_each_thread(g, p);
1653 read_unlock(&tasklist_lock);
1654}
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681asmlinkage long sys_pivot_root(const char __user * new_root,
1682 const char __user * put_old)
1683{
1684 struct vfsmount *tmp;
1685 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
1686 int error;
1687
1688 if (!capable(CAP_SYS_ADMIN))
1689 return -EPERM;
1690
1691 lock_kernel();
1692
1693 error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
1694 &new_nd);
1695 if (error)
1696 goto out0;
1697 error = -EINVAL;
1698 if (!check_mnt(new_nd.mnt))
1699 goto out1;
1700
1701 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
1702 if (error)
1703 goto out1;
1704
1705 error = security_sb_pivotroot(&old_nd, &new_nd);
1706 if (error) {
1707 path_release(&old_nd);
1708 goto out1;
1709 }
1710
1711 read_lock(¤t->fs->lock);
1712 user_nd.mnt = mntget(current->fs->rootmnt);
1713 user_nd.dentry = dget(current->fs->root);
1714 read_unlock(¤t->fs->lock);
1715 down_write(&namespace_sem);
1716 mutex_lock(&old_nd.dentry->d_inode->i_mutex);
1717 error = -EINVAL;
1718 if (IS_MNT_SHARED(old_nd.mnt) ||
1719 IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
1720 IS_MNT_SHARED(user_nd.mnt->mnt_parent))
1721 goto out2;
1722 if (!check_mnt(user_nd.mnt))
1723 goto out2;
1724 error = -ENOENT;
1725 if (IS_DEADDIR(new_nd.dentry->d_inode))
1726 goto out2;
1727 if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
1728 goto out2;
1729 if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
1730 goto out2;
1731 error = -EBUSY;
1732 if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt)
1733 goto out2;
1734 error = -EINVAL;
1735 if (user_nd.mnt->mnt_root != user_nd.dentry)
1736 goto out2;
1737 if (user_nd.mnt->mnt_parent == user_nd.mnt)
1738 goto out2;
1739 if (new_nd.mnt->mnt_root != new_nd.dentry)
1740 goto out2;
1741 if (new_nd.mnt->mnt_parent == new_nd.mnt)
1742 goto out2;
1743 tmp = old_nd.mnt;
1744 spin_lock(&vfsmount_lock);
1745 if (tmp != new_nd.mnt) {
1746 for (;;) {
1747 if (tmp->mnt_parent == tmp)
1748 goto out3;
1749 if (tmp->mnt_parent == new_nd.mnt)
1750 break;
1751 tmp = tmp->mnt_parent;
1752 }
1753 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry))
1754 goto out3;
1755 } else if (!is_subdir(old_nd.dentry, new_nd.dentry))
1756 goto out3;
1757 detach_mnt(new_nd.mnt, &parent_nd);
1758 detach_mnt(user_nd.mnt, &root_parent);
1759 attach_mnt(user_nd.mnt, &old_nd);
1760 attach_mnt(new_nd.mnt, &root_parent);
1761 touch_mnt_namespace(current->nsproxy->mnt_ns);
1762 spin_unlock(&vfsmount_lock);
1763 chroot_fs_refs(&user_nd, &new_nd);
1764 security_sb_post_pivotroot(&user_nd, &new_nd);
1765 error = 0;
1766 path_release(&root_parent);
1767 path_release(&parent_nd);
1768out2:
1769 mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
1770 up_write(&namespace_sem);
1771 path_release(&user_nd);
1772 path_release(&old_nd);
1773out1:
1774 path_release(&new_nd);
1775out0:
1776 unlock_kernel();
1777 return error;
1778out3:
1779 spin_unlock(&vfsmount_lock);
1780 goto out2;
1781}
1782
1783static void __init init_mount_tree(void)
1784{
1785 struct vfsmount *mnt;
1786 struct mnt_namespace *ns;
1787
1788 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
1789 if (IS_ERR(mnt))
1790 panic("Can't create rootfs");
1791 ns = kmalloc(sizeof(*ns), GFP_KERNEL);
1792 if (!ns)
1793 panic("Can't allocate initial namespace");
1794 atomic_set(&ns->count, 1);
1795 INIT_LIST_HEAD(&ns->list);
1796 init_waitqueue_head(&ns->poll);
1797 ns->event = 0;
1798 list_add(&mnt->mnt_list, &ns->list);
1799 ns->root = mnt;
1800 mnt->mnt_ns = ns;
1801
1802 init_task.nsproxy->mnt_ns = ns;
1803 get_mnt_ns(ns);
1804
1805 set_fs_pwd(current->fs, ns->root, ns->root->mnt_root);
1806 set_fs_root(current->fs, ns->root, ns->root->mnt_root);
1807}
1808
1809void __init mnt_init(unsigned long mempages)
1810{
1811 struct list_head *d;
1812 unsigned int nr_hash;
1813 int i;
1814 int err;
1815
1816 init_rwsem(&namespace_sem);
1817
1818 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
1819 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
1820
1821 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
1822
1823 if (!mount_hashtable)
1824 panic("Failed to allocate mount hash table\n");
1825
1826
1827
1828
1829
1830
1831 nr_hash = PAGE_SIZE / sizeof(struct list_head);
1832 hash_bits = 0;
1833 do {
1834 hash_bits++;
1835 } while ((nr_hash >> hash_bits) != 0);
1836 hash_bits--;
1837
1838
1839
1840
1841
1842 nr_hash = 1UL << hash_bits;
1843 hash_mask = nr_hash - 1;
1844
1845 printk("Mount-cache hash table entries: %d\n", nr_hash);
1846
1847
1848 d = mount_hashtable;
1849 i = nr_hash;
1850 do {
1851 INIT_LIST_HEAD(d);
1852 d++;
1853 i--;
1854 } while (i);
1855 err = sysfs_init();
1856 if (err)
1857 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
1858 __FUNCTION__, err);
1859 err = subsystem_register(&fs_subsys);
1860 if (err)
1861 printk(KERN_WARNING "%s: subsystem_register error: %d\n",
1862 __FUNCTION__, err);
1863 init_rootfs();
1864 init_mount_tree();
1865}
1866
1867void __put_mnt_ns(struct mnt_namespace *ns)
1868{
1869 struct vfsmount *root = ns->root;
1870 LIST_HEAD(umount_list);
1871 ns->root = NULL;
1872 spin_unlock(&vfsmount_lock);
1873 down_write(&namespace_sem);
1874 spin_lock(&vfsmount_lock);
1875 umount_tree(root, 0, &umount_list);
1876 spin_unlock(&vfsmount_lock);
1877 up_write(&namespace_sem);
1878 release_mounts(&umount_list);
1879 kfree(ns);
1880}
1881