1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/idr.h>
19#include <linux/acct.h>
20#include <linux/ramfs.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/uaccess.h>
24#include <linux/proc_fs.h>
25#include "pnode.h"
26#include "internal.h"
27
28#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
29#define HASH_SIZE (1UL << HASH_SHIFT)
30
31static int event;
32static DEFINE_IDA(mnt_id_ida);
33static DEFINE_IDA(mnt_group_ida);
34static DEFINE_SPINLOCK(mnt_id_lock);
35static int mnt_id_start = 0;
36static int mnt_group_start = 1;
37
38static struct list_head *mount_hashtable __read_mostly;
39static struct kmem_cache *mnt_cache __read_mostly;
40static struct rw_semaphore namespace_sem;
41
42
43struct kobject *fs_kobj;
44EXPORT_SYMBOL_GPL(fs_kobj);
45
46
47
48
49
50
51
52
53
54DEFINE_BRLOCK(vfsmount_lock);
55
56static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
57{
58 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
59 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
60 tmp = tmp + (tmp >> HASH_SHIFT);
61 return tmp & (HASH_SIZE - 1);
62}
63
64#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
65
66
67
68
69
70static int mnt_alloc_id(struct mount *mnt)
71{
72 int res;
73
74retry:
75 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
76 spin_lock(&mnt_id_lock);
77 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
78 if (!res)
79 mnt_id_start = mnt->mnt_id + 1;
80 spin_unlock(&mnt_id_lock);
81 if (res == -EAGAIN)
82 goto retry;
83
84 return res;
85}
86
87static void mnt_free_id(struct mount *mnt)
88{
89 int id = mnt->mnt_id;
90 spin_lock(&mnt_id_lock);
91 ida_remove(&mnt_id_ida, id);
92 if (mnt_id_start > id)
93 mnt_id_start = id;
94 spin_unlock(&mnt_id_lock);
95}
96
97
98
99
100
101
102static int mnt_alloc_group_id(struct mount *mnt)
103{
104 int res;
105
106 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
107 return -ENOMEM;
108
109 res = ida_get_new_above(&mnt_group_ida,
110 mnt_group_start,
111 &mnt->mnt_group_id);
112 if (!res)
113 mnt_group_start = mnt->mnt_group_id + 1;
114
115 return res;
116}
117
118
119
120
121void mnt_release_group_id(struct mount *mnt)
122{
123 int id = mnt->mnt_group_id;
124 ida_remove(&mnt_group_ida, id);
125 if (mnt_group_start > id)
126 mnt_group_start = id;
127 mnt->mnt_group_id = 0;
128}
129
130
131
132
133static inline void mnt_add_count(struct mount *mnt, int n)
134{
135#ifdef CONFIG_SMP
136 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
137#else
138 preempt_disable();
139 mnt->mnt_count += n;
140 preempt_enable();
141#endif
142}
143
144
145
146
147unsigned int mnt_get_count(struct mount *mnt)
148{
149#ifdef CONFIG_SMP
150 unsigned int count = 0;
151 int cpu;
152
153 for_each_possible_cpu(cpu) {
154 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
155 }
156
157 return count;
158#else
159 return mnt->mnt_count;
160#endif
161}
162
163static struct mount *alloc_vfsmnt(const char *name)
164{
165 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
166 if (mnt) {
167 int err;
168
169 err = mnt_alloc_id(mnt);
170 if (err)
171 goto out_free_cache;
172
173 if (name) {
174 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
175 if (!mnt->mnt_devname)
176 goto out_free_id;
177 }
178
179#ifdef CONFIG_SMP
180 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
181 if (!mnt->mnt_pcp)
182 goto out_free_devname;
183
184 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
185#else
186 mnt->mnt_count = 1;
187 mnt->mnt_writers = 0;
188#endif
189
190 INIT_LIST_HEAD(&mnt->mnt_hash);
191 INIT_LIST_HEAD(&mnt->mnt_child);
192 INIT_LIST_HEAD(&mnt->mnt_mounts);
193 INIT_LIST_HEAD(&mnt->mnt_list);
194 INIT_LIST_HEAD(&mnt->mnt_expire);
195 INIT_LIST_HEAD(&mnt->mnt_share);
196 INIT_LIST_HEAD(&mnt->mnt_slave_list);
197 INIT_LIST_HEAD(&mnt->mnt_slave);
198#ifdef CONFIG_FSNOTIFY
199 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
200#endif
201 }
202 return mnt;
203
204#ifdef CONFIG_SMP
205out_free_devname:
206 kfree(mnt->mnt_devname);
207#endif
208out_free_id:
209 mnt_free_id(mnt);
210out_free_cache:
211 kmem_cache_free(mnt_cache, mnt);
212 return NULL;
213}
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234int __mnt_is_readonly(struct vfsmount *mnt)
235{
236 if (mnt->mnt_flags & MNT_READONLY)
237 return 1;
238 if (mnt->mnt_sb->s_flags & MS_RDONLY)
239 return 1;
240 return 0;
241}
242EXPORT_SYMBOL_GPL(__mnt_is_readonly);
243
244static inline void mnt_inc_writers(struct mount *mnt)
245{
246#ifdef CONFIG_SMP
247 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
248#else
249 mnt->mnt_writers++;
250#endif
251}
252
253static inline void mnt_dec_writers(struct mount *mnt)
254{
255#ifdef CONFIG_SMP
256 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
257#else
258 mnt->mnt_writers--;
259#endif
260}
261
262static unsigned int mnt_get_writers(struct mount *mnt)
263{
264#ifdef CONFIG_SMP
265 unsigned int count = 0;
266 int cpu;
267
268 for_each_possible_cpu(cpu) {
269 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
270 }
271
272 return count;
273#else
274 return mnt->mnt_writers;
275#endif
276}
277
278static int mnt_is_readonly(struct vfsmount *mnt)
279{
280 if (mnt->mnt_sb->s_readonly_remount)
281 return 1;
282
283 smp_rmb();
284 return __mnt_is_readonly(mnt);
285}
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303int __mnt_want_write(struct vfsmount *m)
304{
305 struct mount *mnt = real_mount(m);
306 int ret = 0;
307
308 preempt_disable();
309 mnt_inc_writers(mnt);
310
311
312
313
314
315 smp_mb();
316 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
317 cpu_relax();
318
319
320
321
322
323 smp_rmb();
324 if (mnt_is_readonly(m)) {
325 mnt_dec_writers(mnt);
326 ret = -EROFS;
327 }
328 preempt_enable();
329
330 return ret;
331}
332
333
334
335
336
337
338
339
340
341
342int mnt_want_write(struct vfsmount *m)
343{
344 int ret;
345
346 sb_start_write(m->mnt_sb);
347 ret = __mnt_want_write(m);
348 if (ret)
349 sb_end_write(m->mnt_sb);
350 return ret;
351}
352EXPORT_SYMBOL_GPL(mnt_want_write);
353
354
355
356
357
358
359
360
361
362
363
364
365
366int mnt_clone_write(struct vfsmount *mnt)
367{
368
369 if (__mnt_is_readonly(mnt))
370 return -EROFS;
371 preempt_disable();
372 mnt_inc_writers(real_mount(mnt));
373 preempt_enable();
374 return 0;
375}
376EXPORT_SYMBOL_GPL(mnt_clone_write);
377
378
379
380
381
382
383
384
385int __mnt_want_write_file(struct file *file)
386{
387 struct inode *inode = file->f_dentry->d_inode;
388
389 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
390 return __mnt_want_write(file->f_path.mnt);
391 else
392 return mnt_clone_write(file->f_path.mnt);
393}
394
395
396
397
398
399
400
401
402int mnt_want_write_file(struct file *file)
403{
404 int ret;
405
406 sb_start_write(file->f_path.mnt->mnt_sb);
407 ret = __mnt_want_write_file(file);
408 if (ret)
409 sb_end_write(file->f_path.mnt->mnt_sb);
410 return ret;
411}
412EXPORT_SYMBOL_GPL(mnt_want_write_file);
413
414
415
416
417
418
419
420
421
422void __mnt_drop_write(struct vfsmount *mnt)
423{
424 preempt_disable();
425 mnt_dec_writers(real_mount(mnt));
426 preempt_enable();
427}
428
429
430
431
432
433
434
435
436
437void mnt_drop_write(struct vfsmount *mnt)
438{
439 __mnt_drop_write(mnt);
440 sb_end_write(mnt->mnt_sb);
441}
442EXPORT_SYMBOL_GPL(mnt_drop_write);
443
444void __mnt_drop_write_file(struct file *file)
445{
446 __mnt_drop_write(file->f_path.mnt);
447}
448
449void mnt_drop_write_file(struct file *file)
450{
451 mnt_drop_write(file->f_path.mnt);
452}
453EXPORT_SYMBOL(mnt_drop_write_file);
454
455static int mnt_make_readonly(struct mount *mnt)
456{
457 int ret = 0;
458
459 br_write_lock(&vfsmount_lock);
460 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
461
462
463
464
465 smp_mb();
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 if (mnt_get_writers(mnt) > 0)
484 ret = -EBUSY;
485 else
486 mnt->mnt.mnt_flags |= MNT_READONLY;
487
488
489
490
491 smp_wmb();
492 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
493 br_write_unlock(&vfsmount_lock);
494 return ret;
495}
496
497static void __mnt_unmake_readonly(struct mount *mnt)
498{
499 br_write_lock(&vfsmount_lock);
500 mnt->mnt.mnt_flags &= ~MNT_READONLY;
501 br_write_unlock(&vfsmount_lock);
502}
503
504int sb_prepare_remount_readonly(struct super_block *sb)
505{
506 struct mount *mnt;
507 int err = 0;
508
509
510 if (atomic_long_read(&sb->s_remove_count))
511 return -EBUSY;
512
513 br_write_lock(&vfsmount_lock);
514 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
515 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
516 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
517 smp_mb();
518 if (mnt_get_writers(mnt) > 0) {
519 err = -EBUSY;
520 break;
521 }
522 }
523 }
524 if (!err && atomic_long_read(&sb->s_remove_count))
525 err = -EBUSY;
526
527 if (!err) {
528 sb->s_readonly_remount = 1;
529 smp_wmb();
530 }
531 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
532 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
533 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
534 }
535 br_write_unlock(&vfsmount_lock);
536
537 return err;
538}
539
540static void free_vfsmnt(struct mount *mnt)
541{
542 kfree(mnt->mnt_devname);
543 mnt_free_id(mnt);
544#ifdef CONFIG_SMP
545 free_percpu(mnt->mnt_pcp);
546#endif
547 kmem_cache_free(mnt_cache, mnt);
548}
549
550
551
552
553
554
555struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
556 int dir)
557{
558 struct list_head *head = mount_hashtable + hash(mnt, dentry);
559 struct list_head *tmp = head;
560 struct mount *p, *found = NULL;
561
562 for (;;) {
563 tmp = dir ? tmp->next : tmp->prev;
564 p = NULL;
565 if (tmp == head)
566 break;
567 p = list_entry(tmp, struct mount, mnt_hash);
568 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
569 found = p;
570 break;
571 }
572 }
573 return found;
574}
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592struct vfsmount *lookup_mnt(struct path *path)
593{
594 struct mount *child_mnt;
595
596 br_read_lock(&vfsmount_lock);
597 child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
598 if (child_mnt) {
599 mnt_add_count(child_mnt, 1);
600 br_read_unlock(&vfsmount_lock);
601 return &child_mnt->mnt;
602 } else {
603 br_read_unlock(&vfsmount_lock);
604 return NULL;
605 }
606}
607
608static inline int check_mnt(struct mount *mnt)
609{
610 return mnt->mnt_ns == current->nsproxy->mnt_ns;
611}
612
613
614
615
616static void touch_mnt_namespace(struct mnt_namespace *ns)
617{
618 if (ns) {
619 ns->event = ++event;
620 wake_up_interruptible(&ns->poll);
621 }
622}
623
624
625
626
627static void __touch_mnt_namespace(struct mnt_namespace *ns)
628{
629 if (ns && ns->event != event) {
630 ns->event = event;
631 wake_up_interruptible(&ns->poll);
632 }
633}
634
635
636
637
638
639static void dentry_reset_mounted(struct dentry *dentry)
640{
641 unsigned u;
642
643 for (u = 0; u < HASH_SIZE; u++) {
644 struct mount *p;
645
646 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
647 if (p->mnt_mountpoint == dentry)
648 return;
649 }
650 }
651 spin_lock(&dentry->d_lock);
652 dentry->d_flags &= ~DCACHE_MOUNTED;
653 spin_unlock(&dentry->d_lock);
654}
655
656
657
658
659static void detach_mnt(struct mount *mnt, struct path *old_path)
660{
661 old_path->dentry = mnt->mnt_mountpoint;
662 old_path->mnt = &mnt->mnt_parent->mnt;
663 mnt->mnt_parent = mnt;
664 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
665 list_del_init(&mnt->mnt_child);
666 list_del_init(&mnt->mnt_hash);
667 dentry_reset_mounted(old_path->dentry);
668}
669
670
671
672
673void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
674 struct mount *child_mnt)
675{
676 mnt_add_count(mnt, 1);
677 child_mnt->mnt_mountpoint = dget(dentry);
678 child_mnt->mnt_parent = mnt;
679 spin_lock(&dentry->d_lock);
680 dentry->d_flags |= DCACHE_MOUNTED;
681 spin_unlock(&dentry->d_lock);
682}
683
684
685
686
687static void attach_mnt(struct mount *mnt, struct path *path)
688{
689 mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
690 list_add_tail(&mnt->mnt_hash, mount_hashtable +
691 hash(path->mnt, path->dentry));
692 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
693}
694
695
696
697
698static void commit_tree(struct mount *mnt)
699{
700 struct mount *parent = mnt->mnt_parent;
701 struct mount *m;
702 LIST_HEAD(head);
703 struct mnt_namespace *n = parent->mnt_ns;
704
705 BUG_ON(parent == mnt);
706
707 list_add_tail(&head, &mnt->mnt_list);
708 list_for_each_entry(m, &head, mnt_list)
709 m->mnt_ns = n;
710
711 list_splice(&head, n->list.prev);
712
713 list_add_tail(&mnt->mnt_hash, mount_hashtable +
714 hash(&parent->mnt, mnt->mnt_mountpoint));
715 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
716 touch_mnt_namespace(n);
717}
718
719static struct mount *next_mnt(struct mount *p, struct mount *root)
720{
721 struct list_head *next = p->mnt_mounts.next;
722 if (next == &p->mnt_mounts) {
723 while (1) {
724 if (p == root)
725 return NULL;
726 next = p->mnt_child.next;
727 if (next != &p->mnt_parent->mnt_mounts)
728 break;
729 p = p->mnt_parent;
730 }
731 }
732 return list_entry(next, struct mount, mnt_child);
733}
734
735static struct mount *skip_mnt_tree(struct mount *p)
736{
737 struct list_head *prev = p->mnt_mounts.prev;
738 while (prev != &p->mnt_mounts) {
739 p = list_entry(prev, struct mount, mnt_child);
740 prev = p->mnt_mounts.prev;
741 }
742 return p;
743}
744
745struct vfsmount *
746vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
747{
748 struct mount *mnt;
749 struct dentry *root;
750
751 if (!type)
752 return ERR_PTR(-ENODEV);
753
754 mnt = alloc_vfsmnt(name);
755 if (!mnt)
756 return ERR_PTR(-ENOMEM);
757
758 if (flags & MS_KERNMOUNT)
759 mnt->mnt.mnt_flags = MNT_INTERNAL;
760
761 root = mount_fs(type, flags, name, data);
762 if (IS_ERR(root)) {
763 free_vfsmnt(mnt);
764 return ERR_CAST(root);
765 }
766
767 mnt->mnt.mnt_root = root;
768 mnt->mnt.mnt_sb = root->d_sb;
769 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
770 mnt->mnt_parent = mnt;
771 br_write_lock(&vfsmount_lock);
772 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
773 br_write_unlock(&vfsmount_lock);
774 return &mnt->mnt;
775}
776EXPORT_SYMBOL_GPL(vfs_kern_mount);
777
778static struct mount *clone_mnt(struct mount *old, struct dentry *root,
779 int flag)
780{
781 struct super_block *sb = old->mnt.mnt_sb;
782 struct mount *mnt;
783 int err;
784
785 mnt = alloc_vfsmnt(old->mnt_devname);
786 if (!mnt)
787 return ERR_PTR(-ENOMEM);
788
789 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
790 mnt->mnt_group_id = 0;
791 else
792 mnt->mnt_group_id = old->mnt_group_id;
793
794 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
795 err = mnt_alloc_group_id(mnt);
796 if (err)
797 goto out_free;
798 }
799
800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
801 atomic_inc(&sb->s_active);
802 mnt->mnt.mnt_sb = sb;
803 mnt->mnt.mnt_root = dget(root);
804 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
805 mnt->mnt_parent = mnt;
806 br_write_lock(&vfsmount_lock);
807 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
808 br_write_unlock(&vfsmount_lock);
809
810 if ((flag & CL_SLAVE) ||
811 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
812 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
813 mnt->mnt_master = old;
814 CLEAR_MNT_SHARED(mnt);
815 } else if (!(flag & CL_PRIVATE)) {
816 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
817 list_add(&mnt->mnt_share, &old->mnt_share);
818 if (IS_MNT_SLAVE(old))
819 list_add(&mnt->mnt_slave, &old->mnt_slave);
820 mnt->mnt_master = old->mnt_master;
821 }
822 if (flag & CL_MAKE_SHARED)
823 set_mnt_shared(mnt);
824
825
826
827 if (flag & CL_EXPIRE) {
828 if (!list_empty(&old->mnt_expire))
829 list_add(&mnt->mnt_expire, &old->mnt_expire);
830 }
831
832 return mnt;
833
834 out_free:
835 free_vfsmnt(mnt);
836 return ERR_PTR(err);
837}
838
839static inline void mntfree(struct mount *mnt)
840{
841 struct vfsmount *m = &mnt->mnt;
842 struct super_block *sb = m->mnt_sb;
843
844
845
846
847
848
849
850
851
852
853
854 WARN_ON(mnt_get_writers(mnt));
855 fsnotify_vfsmount_delete(m);
856 dput(m->mnt_root);
857 free_vfsmnt(mnt);
858 deactivate_super(sb);
859}
860
861static void mntput_no_expire(struct mount *mnt)
862{
863put_again:
864#ifdef CONFIG_SMP
865 br_read_lock(&vfsmount_lock);
866 if (likely(mnt->mnt_ns)) {
867
868 mnt_add_count(mnt, -1);
869 br_read_unlock(&vfsmount_lock);
870 return;
871 }
872 br_read_unlock(&vfsmount_lock);
873
874 br_write_lock(&vfsmount_lock);
875 mnt_add_count(mnt, -1);
876 if (mnt_get_count(mnt)) {
877 br_write_unlock(&vfsmount_lock);
878 return;
879 }
880#else
881 mnt_add_count(mnt, -1);
882 if (likely(mnt_get_count(mnt)))
883 return;
884 br_write_lock(&vfsmount_lock);
885#endif
886 if (unlikely(mnt->mnt_pinned)) {
887 mnt_add_count(mnt, mnt->mnt_pinned + 1);
888 mnt->mnt_pinned = 0;
889 br_write_unlock(&vfsmount_lock);
890 acct_auto_close_mnt(&mnt->mnt);
891 goto put_again;
892 }
893
894 list_del(&mnt->mnt_instance);
895 br_write_unlock(&vfsmount_lock);
896 mntfree(mnt);
897}
898
899void mntput(struct vfsmount *mnt)
900{
901 if (mnt) {
902 struct mount *m = real_mount(mnt);
903
904 if (unlikely(m->mnt_expiry_mark))
905 m->mnt_expiry_mark = 0;
906 mntput_no_expire(m);
907 }
908}
909EXPORT_SYMBOL(mntput);
910
911struct vfsmount *mntget(struct vfsmount *mnt)
912{
913 if (mnt)
914 mnt_add_count(real_mount(mnt), 1);
915 return mnt;
916}
917EXPORT_SYMBOL(mntget);
918
919void mnt_pin(struct vfsmount *mnt)
920{
921 br_write_lock(&vfsmount_lock);
922 real_mount(mnt)->mnt_pinned++;
923 br_write_unlock(&vfsmount_lock);
924}
925EXPORT_SYMBOL(mnt_pin);
926
927void mnt_unpin(struct vfsmount *m)
928{
929 struct mount *mnt = real_mount(m);
930 br_write_lock(&vfsmount_lock);
931 if (mnt->mnt_pinned) {
932 mnt_add_count(mnt, 1);
933 mnt->mnt_pinned--;
934 }
935 br_write_unlock(&vfsmount_lock);
936}
937EXPORT_SYMBOL(mnt_unpin);
938
939static inline void mangle(struct seq_file *m, const char *s)
940{
941 seq_escape(m, s, " \t\n\\");
942}
943
944
945
946
947
948
949
950int generic_show_options(struct seq_file *m, struct dentry *root)
951{
952 const char *options;
953
954 rcu_read_lock();
955 options = rcu_dereference(root->d_sb->s_options);
956
957 if (options != NULL && options[0]) {
958 seq_putc(m, ',');
959 mangle(m, options);
960 }
961 rcu_read_unlock();
962
963 return 0;
964}
965EXPORT_SYMBOL(generic_show_options);
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980void save_mount_options(struct super_block *sb, char *options)
981{
982 BUG_ON(sb->s_options);
983 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
984}
985EXPORT_SYMBOL(save_mount_options);
986
987void replace_mount_options(struct super_block *sb, char *options)
988{
989 char *old = sb->s_options;
990 rcu_assign_pointer(sb->s_options, options);
991 if (old) {
992 synchronize_rcu();
993 kfree(old);
994 }
995}
996EXPORT_SYMBOL(replace_mount_options);
997
998#ifdef CONFIG_PROC_FS
999
1000static void *m_start(struct seq_file *m, loff_t *pos)
1001{
1002 struct proc_mounts *p = proc_mounts(m);
1003
1004 down_read(&namespace_sem);
1005 return seq_list_start(&p->ns->list, *pos);
1006}
1007
1008static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1009{
1010 struct proc_mounts *p = proc_mounts(m);
1011
1012 return seq_list_next(v, &p->ns->list, pos);
1013}
1014
1015static void m_stop(struct seq_file *m, void *v)
1016{
1017 up_read(&namespace_sem);
1018}
1019
1020static int m_show(struct seq_file *m, void *v)
1021{
1022 struct proc_mounts *p = proc_mounts(m);
1023 struct mount *r = list_entry(v, struct mount, mnt_list);
1024 return p->show(m, &r->mnt);
1025}
1026
1027const struct seq_operations mounts_op = {
1028 .start = m_start,
1029 .next = m_next,
1030 .stop = m_stop,
1031 .show = m_show,
1032};
1033#endif
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043int may_umount_tree(struct vfsmount *m)
1044{
1045 struct mount *mnt = real_mount(m);
1046 int actual_refs = 0;
1047 int minimum_refs = 0;
1048 struct mount *p;
1049 BUG_ON(!m);
1050
1051
1052 br_write_lock(&vfsmount_lock);
1053 for (p = mnt; p; p = next_mnt(p, mnt)) {
1054 actual_refs += mnt_get_count(p);
1055 minimum_refs += 2;
1056 }
1057 br_write_unlock(&vfsmount_lock);
1058
1059 if (actual_refs > minimum_refs)
1060 return 0;
1061
1062 return 1;
1063}
1064
1065EXPORT_SYMBOL(may_umount_tree);
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080int may_umount(struct vfsmount *mnt)
1081{
1082 int ret = 1;
1083 down_read(&namespace_sem);
1084 br_write_lock(&vfsmount_lock);
1085 if (propagate_mount_busy(real_mount(mnt), 2))
1086 ret = 0;
1087 br_write_unlock(&vfsmount_lock);
1088 up_read(&namespace_sem);
1089 return ret;
1090}
1091
1092EXPORT_SYMBOL(may_umount);
1093
1094void release_mounts(struct list_head *head)
1095{
1096 struct mount *mnt;
1097 while (!list_empty(head)) {
1098 mnt = list_first_entry(head, struct mount, mnt_hash);
1099 list_del_init(&mnt->mnt_hash);
1100 if (mnt_has_parent(mnt)) {
1101 struct dentry *dentry;
1102 struct mount *m;
1103
1104 br_write_lock(&vfsmount_lock);
1105 dentry = mnt->mnt_mountpoint;
1106 m = mnt->mnt_parent;
1107 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1108 mnt->mnt_parent = mnt;
1109 m->mnt_ghosts--;
1110 br_write_unlock(&vfsmount_lock);
1111 dput(dentry);
1112 mntput(&m->mnt);
1113 }
1114 mntput(&mnt->mnt);
1115 }
1116}
1117
1118
1119
1120
1121
1122void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1123{
1124 LIST_HEAD(tmp_list);
1125 struct mount *p;
1126
1127 for (p = mnt; p; p = next_mnt(p, mnt))
1128 list_move(&p->mnt_hash, &tmp_list);
1129
1130 if (propagate)
1131 propagate_umount(&tmp_list);
1132
1133 list_for_each_entry(p, &tmp_list, mnt_hash) {
1134 list_del_init(&p->mnt_expire);
1135 list_del_init(&p->mnt_list);
1136 __touch_mnt_namespace(p->mnt_ns);
1137 p->mnt_ns = NULL;
1138 list_del_init(&p->mnt_child);
1139 if (mnt_has_parent(p)) {
1140 p->mnt_parent->mnt_ghosts++;
1141 dentry_reset_mounted(p->mnt_mountpoint);
1142 }
1143 change_mnt_propagation(p, MS_PRIVATE);
1144 }
1145 list_splice(&tmp_list, kill);
1146}
1147
1148static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
1149
1150static int do_umount(struct mount *mnt, int flags)
1151{
1152 struct super_block *sb = mnt->mnt.mnt_sb;
1153 int retval;
1154 LIST_HEAD(umount_list);
1155
1156 retval = security_sb_umount(&mnt->mnt, flags);
1157 if (retval)
1158 return retval;
1159
1160
1161
1162
1163
1164
1165
1166 if (flags & MNT_EXPIRE) {
1167 if (&mnt->mnt == current->fs->root.mnt ||
1168 flags & (MNT_FORCE | MNT_DETACH))
1169 return -EINVAL;
1170
1171
1172
1173
1174
1175 br_write_lock(&vfsmount_lock);
1176 if (mnt_get_count(mnt) != 2) {
1177 br_write_unlock(&vfsmount_lock);
1178 return -EBUSY;
1179 }
1180 br_write_unlock(&vfsmount_lock);
1181
1182 if (!xchg(&mnt->mnt_expiry_mark, 1))
1183 return -EAGAIN;
1184 }
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1197 sb->s_op->umount_begin(sb);
1198 }
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1210
1211
1212
1213
1214 down_write(&sb->s_umount);
1215 if (!(sb->s_flags & MS_RDONLY))
1216 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1217 up_write(&sb->s_umount);
1218 return retval;
1219 }
1220
1221 down_write(&namespace_sem);
1222 br_write_lock(&vfsmount_lock);
1223 event++;
1224
1225 if (!(flags & MNT_DETACH))
1226 shrink_submounts(mnt, &umount_list);
1227
1228 retval = -EBUSY;
1229 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1230 if (!list_empty(&mnt->mnt_list))
1231 umount_tree(mnt, 1, &umount_list);
1232 retval = 0;
1233 }
1234 br_write_unlock(&vfsmount_lock);
1235 up_write(&namespace_sem);
1236 release_mounts(&umount_list);
1237 return retval;
1238}
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1249{
1250 struct path path;
1251 struct mount *mnt;
1252 int retval;
1253 int lookup_flags = 0;
1254
1255 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1256 return -EINVAL;
1257
1258 if (!(flags & UMOUNT_NOFOLLOW))
1259 lookup_flags |= LOOKUP_FOLLOW;
1260
1261 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1262 if (retval)
1263 goto out;
1264 mnt = real_mount(path.mnt);
1265 retval = -EINVAL;
1266 if (path.dentry != path.mnt->mnt_root)
1267 goto dput_and_out;
1268 if (!check_mnt(mnt))
1269 goto dput_and_out;
1270
1271 retval = -EPERM;
1272 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1273 goto dput_and_out;
1274
1275 retval = do_umount(mnt, flags);
1276dput_and_out:
1277
1278 dput(path.dentry);
1279 mntput_no_expire(mnt);
1280out:
1281 return retval;
1282}
1283
1284#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1285
1286
1287
1288
1289SYSCALL_DEFINE1(oldumount, char __user *, name)
1290{
1291 return sys_umount(name, 0);
1292}
1293
1294#endif
1295
1296static int mount_is_safe(struct path *path)
1297{
1298 if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1299 return 0;
1300 return -EPERM;
1301#ifdef notyet
1302 if (S_ISLNK(path->dentry->d_inode->i_mode))
1303 return -EPERM;
1304 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1305 if (current_uid() != path->dentry->d_inode->i_uid)
1306 return -EPERM;
1307 }
1308 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1309 return -EPERM;
1310 return 0;
1311#endif
1312}
1313
1314static bool mnt_ns_loop(struct path *path)
1315{
1316
1317
1318
1319 struct inode *inode = path->dentry->d_inode;
1320 struct proc_inode *ei;
1321 struct mnt_namespace *mnt_ns;
1322
1323 if (!proc_ns_inode(inode))
1324 return false;
1325
1326 ei = PROC_I(inode);
1327 if (ei->ns_ops != &mntns_operations)
1328 return false;
1329
1330 mnt_ns = ei->ns;
1331 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1332}
1333
1334struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1335 int flag)
1336{
1337 struct mount *res, *p, *q, *r;
1338 struct path path;
1339
1340 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1341 return ERR_PTR(-EINVAL);
1342
1343 res = q = clone_mnt(mnt, dentry, flag);
1344 if (IS_ERR(q))
1345 return q;
1346
1347 q->mnt_mountpoint = mnt->mnt_mountpoint;
1348
1349 p = mnt;
1350 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1351 struct mount *s;
1352 if (!is_subdir(r->mnt_mountpoint, dentry))
1353 continue;
1354
1355 for (s = r; s; s = next_mnt(s, r)) {
1356 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
1357 s = skip_mnt_tree(s);
1358 continue;
1359 }
1360 while (p != s->mnt_parent) {
1361 p = p->mnt_parent;
1362 q = q->mnt_parent;
1363 }
1364 p = s;
1365 path.mnt = &q->mnt;
1366 path.dentry = p->mnt_mountpoint;
1367 q = clone_mnt(p, p->mnt.mnt_root, flag);
1368 if (IS_ERR(q))
1369 goto out;
1370 br_write_lock(&vfsmount_lock);
1371 list_add_tail(&q->mnt_list, &res->mnt_list);
1372 attach_mnt(q, &path);
1373 br_write_unlock(&vfsmount_lock);
1374 }
1375 }
1376 return res;
1377out:
1378 if (res) {
1379 LIST_HEAD(umount_list);
1380 br_write_lock(&vfsmount_lock);
1381 umount_tree(res, 0, &umount_list);
1382 br_write_unlock(&vfsmount_lock);
1383 release_mounts(&umount_list);
1384 }
1385 return q;
1386}
1387
1388
1389
1390struct vfsmount *collect_mounts(struct path *path)
1391{
1392 struct mount *tree;
1393 down_write(&namespace_sem);
1394 tree = copy_tree(real_mount(path->mnt), path->dentry,
1395 CL_COPY_ALL | CL_PRIVATE);
1396 up_write(&namespace_sem);
1397 if (IS_ERR(tree))
1398 return NULL;
1399 return &tree->mnt;
1400}
1401
1402void drop_collected_mounts(struct vfsmount *mnt)
1403{
1404 LIST_HEAD(umount_list);
1405 down_write(&namespace_sem);
1406 br_write_lock(&vfsmount_lock);
1407 umount_tree(real_mount(mnt), 0, &umount_list);
1408 br_write_unlock(&vfsmount_lock);
1409 up_write(&namespace_sem);
1410 release_mounts(&umount_list);
1411}
1412
1413int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1414 struct vfsmount *root)
1415{
1416 struct mount *mnt;
1417 int res = f(root, arg);
1418 if (res)
1419 return res;
1420 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1421 res = f(&mnt->mnt, arg);
1422 if (res)
1423 return res;
1424 }
1425 return 0;
1426}
1427
1428static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1429{
1430 struct mount *p;
1431
1432 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1433 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1434 mnt_release_group_id(p);
1435 }
1436}
1437
1438static int invent_group_ids(struct mount *mnt, bool recurse)
1439{
1440 struct mount *p;
1441
1442 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1443 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1444 int err = mnt_alloc_group_id(p);
1445 if (err) {
1446 cleanup_group_ids(mnt, p);
1447 return err;
1448 }
1449 }
1450 }
1451
1452 return 0;
1453}
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518static int attach_recursive_mnt(struct mount *source_mnt,
1519 struct path *path, struct path *parent_path)
1520{
1521 LIST_HEAD(tree_list);
1522 struct mount *dest_mnt = real_mount(path->mnt);
1523 struct dentry *dest_dentry = path->dentry;
1524 struct mount *child, *p;
1525 int err;
1526
1527 if (IS_MNT_SHARED(dest_mnt)) {
1528 err = invent_group_ids(source_mnt, true);
1529 if (err)
1530 goto out;
1531 }
1532 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
1533 if (err)
1534 goto out_cleanup_ids;
1535
1536 br_write_lock(&vfsmount_lock);
1537
1538 if (IS_MNT_SHARED(dest_mnt)) {
1539 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1540 set_mnt_shared(p);
1541 }
1542 if (parent_path) {
1543 detach_mnt(source_mnt, parent_path);
1544 attach_mnt(source_mnt, path);
1545 touch_mnt_namespace(source_mnt->mnt_ns);
1546 } else {
1547 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1548 commit_tree(source_mnt);
1549 }
1550
1551 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
1552 list_del_init(&child->mnt_hash);
1553 commit_tree(child);
1554 }
1555 br_write_unlock(&vfsmount_lock);
1556
1557 return 0;
1558
1559 out_cleanup_ids:
1560 if (IS_MNT_SHARED(dest_mnt))
1561 cleanup_group_ids(source_mnt, NULL);
1562 out:
1563 return err;
1564}
1565
1566static int lock_mount(struct path *path)
1567{
1568 struct vfsmount *mnt;
1569retry:
1570 mutex_lock(&path->dentry->d_inode->i_mutex);
1571 if (unlikely(cant_mount(path->dentry))) {
1572 mutex_unlock(&path->dentry->d_inode->i_mutex);
1573 return -ENOENT;
1574 }
1575 down_write(&namespace_sem);
1576 mnt = lookup_mnt(path);
1577 if (likely(!mnt))
1578 return 0;
1579 up_write(&namespace_sem);
1580 mutex_unlock(&path->dentry->d_inode->i_mutex);
1581 path_put(path);
1582 path->mnt = mnt;
1583 path->dentry = dget(mnt->mnt_root);
1584 goto retry;
1585}
1586
1587static void unlock_mount(struct path *path)
1588{
1589 up_write(&namespace_sem);
1590 mutex_unlock(&path->dentry->d_inode->i_mutex);
1591}
1592
1593static int graft_tree(struct mount *mnt, struct path *path)
1594{
1595 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1596 return -EINVAL;
1597
1598 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1599 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1600 return -ENOTDIR;
1601
1602 if (d_unlinked(path->dentry))
1603 return -ENOENT;
1604
1605 return attach_recursive_mnt(mnt, path, NULL);
1606}
1607
1608
1609
1610
1611
1612static int flags_to_propagation_type(int flags)
1613{
1614 int type = flags & ~(MS_REC | MS_SILENT);
1615
1616
1617 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1618 return 0;
1619
1620 if (!is_power_of_2(type))
1621 return 0;
1622 return type;
1623}
1624
1625
1626
1627
1628static int do_change_type(struct path *path, int flag)
1629{
1630 struct mount *m;
1631 struct mount *mnt = real_mount(path->mnt);
1632 int recurse = flag & MS_REC;
1633 int type;
1634 int err = 0;
1635
1636 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1637 return -EPERM;
1638
1639 if (path->dentry != path->mnt->mnt_root)
1640 return -EINVAL;
1641
1642 type = flags_to_propagation_type(flag);
1643 if (!type)
1644 return -EINVAL;
1645
1646 down_write(&namespace_sem);
1647 if (type == MS_SHARED) {
1648 err = invent_group_ids(mnt, recurse);
1649 if (err)
1650 goto out_unlock;
1651 }
1652
1653 br_write_lock(&vfsmount_lock);
1654 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1655 change_mnt_propagation(m, type);
1656 br_write_unlock(&vfsmount_lock);
1657
1658 out_unlock:
1659 up_write(&namespace_sem);
1660 return err;
1661}
1662
1663
1664
1665
1666static int do_loopback(struct path *path, const char *old_name,
1667 int recurse)
1668{
1669 LIST_HEAD(umount_list);
1670 struct path old_path;
1671 struct mount *mnt = NULL, *old;
1672 int err = mount_is_safe(path);
1673 if (err)
1674 return err;
1675 if (!old_name || !*old_name)
1676 return -EINVAL;
1677 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
1678 if (err)
1679 return err;
1680
1681 err = -EINVAL;
1682 if (mnt_ns_loop(&old_path))
1683 goto out;
1684
1685 err = lock_mount(path);
1686 if (err)
1687 goto out;
1688
1689 old = real_mount(old_path.mnt);
1690
1691 err = -EINVAL;
1692 if (IS_MNT_UNBINDABLE(old))
1693 goto out2;
1694
1695 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
1696 goto out2;
1697
1698 if (recurse)
1699 mnt = copy_tree(old, old_path.dentry, 0);
1700 else
1701 mnt = clone_mnt(old, old_path.dentry, 0);
1702
1703 if (IS_ERR(mnt)) {
1704 err = PTR_ERR(mnt);
1705 goto out;
1706 }
1707
1708 err = graft_tree(mnt, path);
1709 if (err) {
1710 br_write_lock(&vfsmount_lock);
1711 umount_tree(mnt, 0, &umount_list);
1712 br_write_unlock(&vfsmount_lock);
1713 }
1714out2:
1715 unlock_mount(path);
1716 release_mounts(&umount_list);
1717out:
1718 path_put(&old_path);
1719 return err;
1720}
1721
1722static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1723{
1724 int error = 0;
1725 int readonly_request = 0;
1726
1727 if (ms_flags & MS_RDONLY)
1728 readonly_request = 1;
1729 if (readonly_request == __mnt_is_readonly(mnt))
1730 return 0;
1731
1732 if (readonly_request)
1733 error = mnt_make_readonly(real_mount(mnt));
1734 else
1735 __mnt_unmake_readonly(real_mount(mnt));
1736 return error;
1737}
1738
1739
1740
1741
1742
1743
1744static int do_remount(struct path *path, int flags, int mnt_flags,
1745 void *data)
1746{
1747 int err;
1748 struct super_block *sb = path->mnt->mnt_sb;
1749 struct mount *mnt = real_mount(path->mnt);
1750
1751 if (!capable(CAP_SYS_ADMIN))
1752 return -EPERM;
1753
1754 if (!check_mnt(mnt))
1755 return -EINVAL;
1756
1757 if (path->dentry != path->mnt->mnt_root)
1758 return -EINVAL;
1759
1760 err = security_sb_remount(sb, data);
1761 if (err)
1762 return err;
1763
1764 down_write(&sb->s_umount);
1765 if (flags & MS_BIND)
1766 err = change_mount_flags(path->mnt, flags);
1767 else
1768 err = do_remount_sb(sb, flags, data, 0);
1769 if (!err) {
1770 br_write_lock(&vfsmount_lock);
1771 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
1772 mnt->mnt.mnt_flags = mnt_flags;
1773 br_write_unlock(&vfsmount_lock);
1774 }
1775 up_write(&sb->s_umount);
1776 if (!err) {
1777 br_write_lock(&vfsmount_lock);
1778 touch_mnt_namespace(mnt->mnt_ns);
1779 br_write_unlock(&vfsmount_lock);
1780 }
1781 return err;
1782}
1783
1784static inline int tree_contains_unbindable(struct mount *mnt)
1785{
1786 struct mount *p;
1787 for (p = mnt; p; p = next_mnt(p, mnt)) {
1788 if (IS_MNT_UNBINDABLE(p))
1789 return 1;
1790 }
1791 return 0;
1792}
1793
1794static int do_move_mount(struct path *path, const char *old_name)
1795{
1796 struct path old_path, parent_path;
1797 struct mount *p;
1798 struct mount *old;
1799 int err = 0;
1800 if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1801 return -EPERM;
1802 if (!old_name || !*old_name)
1803 return -EINVAL;
1804 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1805 if (err)
1806 return err;
1807
1808 err = lock_mount(path);
1809 if (err < 0)
1810 goto out;
1811
1812 old = real_mount(old_path.mnt);
1813 p = real_mount(path->mnt);
1814
1815 err = -EINVAL;
1816 if (!check_mnt(p) || !check_mnt(old))
1817 goto out1;
1818
1819 if (d_unlinked(path->dentry))
1820 goto out1;
1821
1822 err = -EINVAL;
1823 if (old_path.dentry != old_path.mnt->mnt_root)
1824 goto out1;
1825
1826 if (!mnt_has_parent(old))
1827 goto out1;
1828
1829 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1830 S_ISDIR(old_path.dentry->d_inode->i_mode))
1831 goto out1;
1832
1833
1834
1835 if (IS_MNT_SHARED(old->mnt_parent))
1836 goto out1;
1837
1838
1839
1840
1841 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
1842 goto out1;
1843 err = -ELOOP;
1844 for (; mnt_has_parent(p); p = p->mnt_parent)
1845 if (p == old)
1846 goto out1;
1847
1848 err = attach_recursive_mnt(old, path, &parent_path);
1849 if (err)
1850 goto out1;
1851
1852
1853
1854 list_del_init(&old->mnt_expire);
1855out1:
1856 unlock_mount(path);
1857out:
1858 if (!err)
1859 path_put(&parent_path);
1860 path_put(&old_path);
1861 return err;
1862}
1863
1864static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1865{
1866 int err;
1867 const char *subtype = strchr(fstype, '.');
1868 if (subtype) {
1869 subtype++;
1870 err = -EINVAL;
1871 if (!subtype[0])
1872 goto err;
1873 } else
1874 subtype = "";
1875
1876 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
1877 err = -ENOMEM;
1878 if (!mnt->mnt_sb->s_subtype)
1879 goto err;
1880 return mnt;
1881
1882 err:
1883 mntput(mnt);
1884 return ERR_PTR(err);
1885}
1886
1887
1888
1889
1890static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1891{
1892 int err;
1893
1894 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1895
1896 err = lock_mount(path);
1897 if (err)
1898 return err;
1899
1900 err = -EINVAL;
1901 if (unlikely(!check_mnt(real_mount(path->mnt)))) {
1902
1903 if (!(mnt_flags & MNT_SHRINKABLE))
1904 goto unlock;
1905
1906 if (!real_mount(path->mnt)->mnt_ns)
1907 goto unlock;
1908 }
1909
1910
1911 err = -EBUSY;
1912 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
1913 path->mnt->mnt_root == path->dentry)
1914 goto unlock;
1915
1916 err = -EINVAL;
1917 if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
1918 goto unlock;
1919
1920 newmnt->mnt.mnt_flags = mnt_flags;
1921 err = graft_tree(newmnt, path);
1922
1923unlock:
1924 unlock_mount(path);
1925 return err;
1926}
1927
1928
1929
1930
1931
1932static int do_new_mount(struct path *path, const char *fstype, int flags,
1933 int mnt_flags, const char *name, void *data)
1934{
1935 struct file_system_type *type;
1936 struct user_namespace *user_ns;
1937 struct vfsmount *mnt;
1938 int err;
1939
1940 if (!fstype)
1941 return -EINVAL;
1942
1943
1944 user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
1945 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1946 return -EPERM;
1947
1948 type = get_fs_type(fstype);
1949 if (!type)
1950 return -ENODEV;
1951
1952 if (user_ns != &init_user_ns) {
1953 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
1954 put_filesystem(type);
1955 return -EPERM;
1956 }
1957
1958
1959
1960 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
1961 flags |= MS_NODEV;
1962 mnt_flags |= MNT_NODEV;
1963 }
1964 }
1965
1966 mnt = vfs_kern_mount(type, flags, name, data);
1967 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1968 !mnt->mnt_sb->s_subtype)
1969 mnt = fs_set_subtype(mnt, fstype);
1970
1971 put_filesystem(type);
1972 if (IS_ERR(mnt))
1973 return PTR_ERR(mnt);
1974
1975 err = do_add_mount(real_mount(mnt), path, mnt_flags);
1976 if (err)
1977 mntput(mnt);
1978 return err;
1979}
1980
1981int finish_automount(struct vfsmount *m, struct path *path)
1982{
1983 struct mount *mnt = real_mount(m);
1984 int err;
1985
1986
1987
1988 BUG_ON(mnt_get_count(mnt) < 2);
1989
1990 if (m->mnt_sb == path->mnt->mnt_sb &&
1991 m->mnt_root == path->dentry) {
1992 err = -ELOOP;
1993 goto fail;
1994 }
1995
1996 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
1997 if (!err)
1998 return 0;
1999fail:
2000
2001 if (!list_empty(&mnt->mnt_expire)) {
2002 down_write(&namespace_sem);
2003 br_write_lock(&vfsmount_lock);
2004 list_del_init(&mnt->mnt_expire);
2005 br_write_unlock(&vfsmount_lock);
2006 up_write(&namespace_sem);
2007 }
2008 mntput(m);
2009 mntput(m);
2010 return err;
2011}
2012
2013
2014
2015
2016
2017
2018void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2019{
2020 down_write(&namespace_sem);
2021 br_write_lock(&vfsmount_lock);
2022
2023 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2024
2025 br_write_unlock(&vfsmount_lock);
2026 up_write(&namespace_sem);
2027}
2028EXPORT_SYMBOL(mnt_set_expiry);
2029
2030
2031
2032
2033
2034
2035void mark_mounts_for_expiry(struct list_head *mounts)
2036{
2037 struct mount *mnt, *next;
2038 LIST_HEAD(graveyard);
2039 LIST_HEAD(umounts);
2040
2041 if (list_empty(mounts))
2042 return;
2043
2044 down_write(&namespace_sem);
2045 br_write_lock(&vfsmount_lock);
2046
2047
2048
2049
2050
2051
2052
2053 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2054 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2055 propagate_mount_busy(mnt, 1))
2056 continue;
2057 list_move(&mnt->mnt_expire, &graveyard);
2058 }
2059 while (!list_empty(&graveyard)) {
2060 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2061 touch_mnt_namespace(mnt->mnt_ns);
2062 umount_tree(mnt, 1, &umounts);
2063 }
2064 br_write_unlock(&vfsmount_lock);
2065 up_write(&namespace_sem);
2066
2067 release_mounts(&umounts);
2068}
2069
2070EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2071
2072
2073
2074
2075
2076
2077
2078static int select_submounts(struct mount *parent, struct list_head *graveyard)
2079{
2080 struct mount *this_parent = parent;
2081 struct list_head *next;
2082 int found = 0;
2083
2084repeat:
2085 next = this_parent->mnt_mounts.next;
2086resume:
2087 while (next != &this_parent->mnt_mounts) {
2088 struct list_head *tmp = next;
2089 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2090
2091 next = tmp->next;
2092 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2093 continue;
2094
2095
2096
2097 if (!list_empty(&mnt->mnt_mounts)) {
2098 this_parent = mnt;
2099 goto repeat;
2100 }
2101
2102 if (!propagate_mount_busy(mnt, 1)) {
2103 list_move_tail(&mnt->mnt_expire, graveyard);
2104 found++;
2105 }
2106 }
2107
2108
2109
2110 if (this_parent != parent) {
2111 next = this_parent->mnt_child.next;
2112 this_parent = this_parent->mnt_parent;
2113 goto resume;
2114 }
2115 return found;
2116}
2117
2118
2119
2120
2121
2122
2123
2124static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
2125{
2126 LIST_HEAD(graveyard);
2127 struct mount *m;
2128
2129
2130 while (select_submounts(mnt, &graveyard)) {
2131 while (!list_empty(&graveyard)) {
2132 m = list_first_entry(&graveyard, struct mount,
2133 mnt_expire);
2134 touch_mnt_namespace(m->mnt_ns);
2135 umount_tree(m, 1, umounts);
2136 }
2137 }
2138}
2139
2140
2141
2142
2143
2144
2145
2146static long exact_copy_from_user(void *to, const void __user * from,
2147 unsigned long n)
2148{
2149 char *t = to;
2150 const char __user *f = from;
2151 char c;
2152
2153 if (!access_ok(VERIFY_READ, from, n))
2154 return n;
2155
2156 while (n) {
2157 if (__get_user(c, f)) {
2158 memset(t, 0, n);
2159 break;
2160 }
2161 *t++ = c;
2162 f++;
2163 n--;
2164 }
2165 return n;
2166}
2167
2168int copy_mount_options(const void __user * data, unsigned long *where)
2169{
2170 int i;
2171 unsigned long page;
2172 unsigned long size;
2173
2174 *where = 0;
2175 if (!data)
2176 return 0;
2177
2178 if (!(page = __get_free_page(GFP_KERNEL)))
2179 return -ENOMEM;
2180
2181
2182
2183
2184
2185
2186 size = TASK_SIZE - (unsigned long)data;
2187 if (size > PAGE_SIZE)
2188 size = PAGE_SIZE;
2189
2190 i = size - exact_copy_from_user((void *)page, data, size);
2191 if (!i) {
2192 free_page(page);
2193 return -EFAULT;
2194 }
2195 if (i != PAGE_SIZE)
2196 memset((char *)page + i, 0, PAGE_SIZE - i);
2197 *where = page;
2198 return 0;
2199}
2200
2201int copy_mount_string(const void __user *data, char **where)
2202{
2203 char *tmp;
2204
2205 if (!data) {
2206 *where = NULL;
2207 return 0;
2208 }
2209
2210 tmp = strndup_user(data, PAGE_SIZE);
2211 if (IS_ERR(tmp))
2212 return PTR_ERR(tmp);
2213
2214 *where = tmp;
2215 return 0;
2216}
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232long do_mount(const char *dev_name, const char *dir_name,
2233 const char *type_page, unsigned long flags, void *data_page)
2234{
2235 struct path path;
2236 int retval = 0;
2237 int mnt_flags = 0;
2238
2239
2240 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2241 flags &= ~MS_MGC_MSK;
2242
2243
2244
2245 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
2246 return -EINVAL;
2247
2248 if (data_page)
2249 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2250
2251
2252 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
2253 if (retval)
2254 return retval;
2255
2256 retval = security_sb_mount(dev_name, &path,
2257 type_page, flags, data_page);
2258 if (retval)
2259 goto dput_out;
2260
2261
2262 if (!(flags & MS_NOATIME))
2263 mnt_flags |= MNT_RELATIME;
2264
2265
2266 if (flags & MS_NOSUID)
2267 mnt_flags |= MNT_NOSUID;
2268 if (flags & MS_NODEV)
2269 mnt_flags |= MNT_NODEV;
2270 if (flags & MS_NOEXEC)
2271 mnt_flags |= MNT_NOEXEC;
2272 if (flags & MS_NOATIME)
2273 mnt_flags |= MNT_NOATIME;
2274 if (flags & MS_NODIRATIME)
2275 mnt_flags |= MNT_NODIRATIME;
2276 if (flags & MS_STRICTATIME)
2277 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2278 if (flags & MS_RDONLY)
2279 mnt_flags |= MNT_READONLY;
2280
2281 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2282 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2283 MS_STRICTATIME);
2284
2285 if (flags & MS_REMOUNT)
2286 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2287 data_page);
2288 else if (flags & MS_BIND)
2289 retval = do_loopback(&path, dev_name, flags & MS_REC);
2290 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2291 retval = do_change_type(&path, flags);
2292 else if (flags & MS_MOVE)
2293 retval = do_move_mount(&path, dev_name);
2294 else
2295 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2296 dev_name, data_page);
2297dput_out:
2298 path_put(&path);
2299 return retval;
2300}
2301
2302static void free_mnt_ns(struct mnt_namespace *ns)
2303{
2304 proc_free_inum(ns->proc_inum);
2305 put_user_ns(ns->user_ns);
2306 kfree(ns);
2307}
2308
2309
2310
2311
2312
2313
2314
2315
2316static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2317
2318static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2319{
2320 struct mnt_namespace *new_ns;
2321 int ret;
2322
2323 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2324 if (!new_ns)
2325 return ERR_PTR(-ENOMEM);
2326 ret = proc_alloc_inum(&new_ns->proc_inum);
2327 if (ret) {
2328 kfree(new_ns);
2329 return ERR_PTR(ret);
2330 }
2331 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2332 atomic_set(&new_ns->count, 1);
2333 new_ns->root = NULL;
2334 INIT_LIST_HEAD(&new_ns->list);
2335 init_waitqueue_head(&new_ns->poll);
2336 new_ns->event = 0;
2337 new_ns->user_ns = get_user_ns(user_ns);
2338 return new_ns;
2339}
2340
2341
2342
2343
2344
2345static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2346 struct user_namespace *user_ns, struct fs_struct *fs)
2347{
2348 struct mnt_namespace *new_ns;
2349 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2350 struct mount *p, *q;
2351 struct mount *old = mnt_ns->root;
2352 struct mount *new;
2353 int copy_flags;
2354
2355 new_ns = alloc_mnt_ns(user_ns);
2356 if (IS_ERR(new_ns))
2357 return new_ns;
2358
2359 down_write(&namespace_sem);
2360
2361 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2362 if (user_ns != mnt_ns->user_ns)
2363 copy_flags |= CL_SHARED_TO_SLAVE;
2364 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2365 if (IS_ERR(new)) {
2366 up_write(&namespace_sem);
2367 free_mnt_ns(new_ns);
2368 return ERR_CAST(new);
2369 }
2370 new_ns->root = new;
2371 br_write_lock(&vfsmount_lock);
2372 list_add_tail(&new_ns->list, &new->mnt_list);
2373 br_write_unlock(&vfsmount_lock);
2374
2375
2376
2377
2378
2379
2380 p = old;
2381 q = new;
2382 while (p) {
2383 q->mnt_ns = new_ns;
2384 if (fs) {
2385 if (&p->mnt == fs->root.mnt) {
2386 fs->root.mnt = mntget(&q->mnt);
2387 rootmnt = &p->mnt;
2388 }
2389 if (&p->mnt == fs->pwd.mnt) {
2390 fs->pwd.mnt = mntget(&q->mnt);
2391 pwdmnt = &p->mnt;
2392 }
2393 }
2394 p = next_mnt(p, old);
2395 q = next_mnt(q, new);
2396 }
2397 up_write(&namespace_sem);
2398
2399 if (rootmnt)
2400 mntput(rootmnt);
2401 if (pwdmnt)
2402 mntput(pwdmnt);
2403
2404 return new_ns;
2405}
2406
2407struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2408 struct user_namespace *user_ns, struct fs_struct *new_fs)
2409{
2410 struct mnt_namespace *new_ns;
2411
2412 BUG_ON(!ns);
2413 get_mnt_ns(ns);
2414
2415 if (!(flags & CLONE_NEWNS))
2416 return ns;
2417
2418 new_ns = dup_mnt_ns(ns, user_ns, new_fs);
2419
2420 put_mnt_ns(ns);
2421 return new_ns;
2422}
2423
2424
2425
2426
2427
2428static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2429{
2430 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2431 if (!IS_ERR(new_ns)) {
2432 struct mount *mnt = real_mount(m);
2433 mnt->mnt_ns = new_ns;
2434 new_ns->root = mnt;
2435 list_add(&new_ns->list, &mnt->mnt_list);
2436 } else {
2437 mntput(m);
2438 }
2439 return new_ns;
2440}
2441
2442struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2443{
2444 struct mnt_namespace *ns;
2445 struct super_block *s;
2446 struct path path;
2447 int err;
2448
2449 ns = create_mnt_ns(mnt);
2450 if (IS_ERR(ns))
2451 return ERR_CAST(ns);
2452
2453 err = vfs_path_lookup(mnt->mnt_root, mnt,
2454 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2455
2456 put_mnt_ns(ns);
2457
2458 if (err)
2459 return ERR_PTR(err);
2460
2461
2462 s = path.mnt->mnt_sb;
2463 atomic_inc(&s->s_active);
2464 mntput(path.mnt);
2465
2466 down_write(&s->s_umount);
2467
2468 return path.dentry;
2469}
2470EXPORT_SYMBOL(mount_subtree);
2471
2472SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2473 char __user *, type, unsigned long, flags, void __user *, data)
2474{
2475 int ret;
2476 char *kernel_type;
2477 struct filename *kernel_dir;
2478 char *kernel_dev;
2479 unsigned long data_page;
2480
2481 ret = copy_mount_string(type, &kernel_type);
2482 if (ret < 0)
2483 goto out_type;
2484
2485 kernel_dir = getname(dir_name);
2486 if (IS_ERR(kernel_dir)) {
2487 ret = PTR_ERR(kernel_dir);
2488 goto out_dir;
2489 }
2490
2491 ret = copy_mount_string(dev_name, &kernel_dev);
2492 if (ret < 0)
2493 goto out_dev;
2494
2495 ret = copy_mount_options(data, &data_page);
2496 if (ret < 0)
2497 goto out_data;
2498
2499 ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
2500 (void *) data_page);
2501
2502 free_page(data_page);
2503out_data:
2504 kfree(kernel_dev);
2505out_dev:
2506 putname(kernel_dir);
2507out_dir:
2508 kfree(kernel_type);
2509out_type:
2510 return ret;
2511}
2512
2513
2514
2515
2516
2517
2518bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2519 const struct path *root)
2520{
2521 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2522 dentry = mnt->mnt_mountpoint;
2523 mnt = mnt->mnt_parent;
2524 }
2525 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2526}
2527
2528int path_is_under(struct path *path1, struct path *path2)
2529{
2530 int res;
2531 br_read_lock(&vfsmount_lock);
2532 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2533 br_read_unlock(&vfsmount_lock);
2534 return res;
2535}
2536EXPORT_SYMBOL(path_is_under);
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2564 const char __user *, put_old)
2565{
2566 struct path new, old, parent_path, root_parent, root;
2567 struct mount *new_mnt, *root_mnt;
2568 int error;
2569
2570 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
2571 return -EPERM;
2572
2573 error = user_path_dir(new_root, &new);
2574 if (error)
2575 goto out0;
2576
2577 error = user_path_dir(put_old, &old);
2578 if (error)
2579 goto out1;
2580
2581 error = security_sb_pivotroot(&old, &new);
2582 if (error)
2583 goto out2;
2584
2585 get_fs_root(current->fs, &root);
2586 error = lock_mount(&old);
2587 if (error)
2588 goto out3;
2589
2590 error = -EINVAL;
2591 new_mnt = real_mount(new.mnt);
2592 root_mnt = real_mount(root.mnt);
2593 if (IS_MNT_SHARED(real_mount(old.mnt)) ||
2594 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2595 IS_MNT_SHARED(root_mnt->mnt_parent))
2596 goto out4;
2597 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2598 goto out4;
2599 error = -ENOENT;
2600 if (d_unlinked(new.dentry))
2601 goto out4;
2602 if (d_unlinked(old.dentry))
2603 goto out4;
2604 error = -EBUSY;
2605 if (new.mnt == root.mnt ||
2606 old.mnt == root.mnt)
2607 goto out4;
2608 error = -EINVAL;
2609 if (root.mnt->mnt_root != root.dentry)
2610 goto out4;
2611 if (!mnt_has_parent(root_mnt))
2612 goto out4;
2613 if (new.mnt->mnt_root != new.dentry)
2614 goto out4;
2615 if (!mnt_has_parent(new_mnt))
2616 goto out4;
2617
2618 if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
2619 goto out4;
2620 br_write_lock(&vfsmount_lock);
2621 detach_mnt(new_mnt, &parent_path);
2622 detach_mnt(root_mnt, &root_parent);
2623
2624 attach_mnt(root_mnt, &old);
2625
2626 attach_mnt(new_mnt, &root_parent);
2627 touch_mnt_namespace(current->nsproxy->mnt_ns);
2628 br_write_unlock(&vfsmount_lock);
2629 chroot_fs_refs(&root, &new);
2630 error = 0;
2631out4:
2632 unlock_mount(&old);
2633 if (!error) {
2634 path_put(&root_parent);
2635 path_put(&parent_path);
2636 }
2637out3:
2638 path_put(&root);
2639out2:
2640 path_put(&old);
2641out1:
2642 path_put(&new);
2643out0:
2644 return error;
2645}
2646
2647static void __init init_mount_tree(void)
2648{
2649 struct vfsmount *mnt;
2650 struct mnt_namespace *ns;
2651 struct path root;
2652 struct file_system_type *type;
2653
2654 type = get_fs_type("rootfs");
2655 if (!type)
2656 panic("Can't find rootfs type");
2657 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
2658 put_filesystem(type);
2659 if (IS_ERR(mnt))
2660 panic("Can't create rootfs");
2661
2662 ns = create_mnt_ns(mnt);
2663 if (IS_ERR(ns))
2664 panic("Can't allocate initial namespace");
2665
2666 init_task.nsproxy->mnt_ns = ns;
2667 get_mnt_ns(ns);
2668
2669 root.mnt = mnt;
2670 root.dentry = mnt->mnt_root;
2671
2672 set_fs_pwd(current->fs, &root);
2673 set_fs_root(current->fs, &root);
2674}
2675
2676void __init mnt_init(void)
2677{
2678 unsigned u;
2679 int err;
2680
2681 init_rwsem(&namespace_sem);
2682
2683 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
2684 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2685
2686 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2687
2688 if (!mount_hashtable)
2689 panic("Failed to allocate mount hash table\n");
2690
2691 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2692
2693 for (u = 0; u < HASH_SIZE; u++)
2694 INIT_LIST_HEAD(&mount_hashtable[u]);
2695
2696 br_lock_init(&vfsmount_lock);
2697
2698 err = sysfs_init();
2699 if (err)
2700 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2701 __func__, err);
2702 fs_kobj = kobject_create_and_add("fs", NULL);
2703 if (!fs_kobj)
2704 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2705 init_rootfs();
2706 init_mount_tree();
2707}
2708
2709void put_mnt_ns(struct mnt_namespace *ns)
2710{
2711 LIST_HEAD(umount_list);
2712
2713 if (!atomic_dec_and_test(&ns->count))
2714 return;
2715 down_write(&namespace_sem);
2716 br_write_lock(&vfsmount_lock);
2717 umount_tree(ns->root, 0, &umount_list);
2718 br_write_unlock(&vfsmount_lock);
2719 up_write(&namespace_sem);
2720 release_mounts(&umount_list);
2721 free_mnt_ns(ns);
2722}
2723
2724struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2725{
2726 struct vfsmount *mnt;
2727 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
2728 if (!IS_ERR(mnt)) {
2729
2730
2731
2732
2733 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
2734 }
2735 return mnt;
2736}
2737EXPORT_SYMBOL_GPL(kern_mount_data);
2738
2739void kern_unmount(struct vfsmount *mnt)
2740{
2741
2742 if (!IS_ERR_OR_NULL(mnt)) {
2743 br_write_lock(&vfsmount_lock);
2744 real_mount(mnt)->mnt_ns = NULL;
2745 br_write_unlock(&vfsmount_lock);
2746 mntput(mnt);
2747 }
2748}
2749EXPORT_SYMBOL(kern_unmount);
2750
2751bool our_mnt(struct vfsmount *mnt)
2752{
2753 return check_mnt(real_mount(mnt));
2754}
2755
2756static void *mntns_get(struct task_struct *task)
2757{
2758 struct mnt_namespace *ns = NULL;
2759 struct nsproxy *nsproxy;
2760
2761 rcu_read_lock();
2762 nsproxy = task_nsproxy(task);
2763 if (nsproxy) {
2764 ns = nsproxy->mnt_ns;
2765 get_mnt_ns(ns);
2766 }
2767 rcu_read_unlock();
2768
2769 return ns;
2770}
2771
2772static void mntns_put(void *ns)
2773{
2774 put_mnt_ns(ns);
2775}
2776
2777static int mntns_install(struct nsproxy *nsproxy, void *ns)
2778{
2779 struct fs_struct *fs = current->fs;
2780 struct mnt_namespace *mnt_ns = ns;
2781 struct path root;
2782
2783 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
2784 !nsown_capable(CAP_SYS_CHROOT) ||
2785 !nsown_capable(CAP_SYS_ADMIN))
2786 return -EPERM;
2787
2788 if (fs->users != 1)
2789 return -EINVAL;
2790
2791 get_mnt_ns(mnt_ns);
2792 put_mnt_ns(nsproxy->mnt_ns);
2793 nsproxy->mnt_ns = mnt_ns;
2794
2795
2796 root.mnt = &mnt_ns->root->mnt;
2797 root.dentry = mnt_ns->root->mnt.mnt_root;
2798 path_get(&root);
2799 while(d_mountpoint(root.dentry) && follow_down_one(&root))
2800 ;
2801
2802
2803 set_fs_pwd(fs, &root);
2804 set_fs_root(fs, &root);
2805
2806 path_put(&root);
2807 return 0;
2808}
2809
2810static unsigned int mntns_inum(void *ns)
2811{
2812 struct mnt_namespace *mnt_ns = ns;
2813 return mnt_ns->proc_inum;
2814}
2815
2816const struct proc_ns_operations mntns_operations = {
2817 .name = "mnt",
2818 .type = CLONE_NEWNS,
2819 .get = mntns_get,
2820 .put = mntns_put,
2821 .install = mntns_install,
2822 .inum = mntns_inum,
2823};
2824