1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/slab.h>
13#include <linux/sched.h>
14#include <linux/smp_lock.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/acct.h>
18#include <linux/capability.h>
19#include <linux/cpumask.h>
20#include <linux/module.h>
21#include <linux/sysfs.h>
22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h>
24#include <linux/namei.h>
25#include <linux/nsproxy.h>
26#include <linux/security.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/log2.h>
30#include <linux/idr.h>
31#include <linux/fs_struct.h>
32#include <asm/uaccess.h>
33#include <asm/unistd.h>
34#include "pnode.h"
35#include "internal.h"
36
37#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
38#define HASH_SIZE (1UL << HASH_SHIFT)
39
40
41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42
43static int event;
44static DEFINE_IDA(mnt_id_ida);
45static DEFINE_IDA(mnt_group_ida);
46static int mnt_id_start = 0;
47static int mnt_group_start = 1;
48
49static struct list_head *mount_hashtable __read_mostly;
50static struct kmem_cache *mnt_cache __read_mostly;
51static struct rw_semaphore namespace_sem;
52
53
54struct kobject *fs_kobj;
55EXPORT_SYMBOL_GPL(fs_kobj);
56
57static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
58{
59 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
60 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
61 tmp = tmp + (tmp >> HASH_SHIFT);
62 return tmp & (HASH_SIZE - 1);
63}
64
65#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
66
67
68static int mnt_alloc_id(struct vfsmount *mnt)
69{
70 int res;
71
72retry:
73 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
74 spin_lock(&vfsmount_lock);
75 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
76 if (!res)
77 mnt_id_start = mnt->mnt_id + 1;
78 spin_unlock(&vfsmount_lock);
79 if (res == -EAGAIN)
80 goto retry;
81
82 return res;
83}
84
85static void mnt_free_id(struct vfsmount *mnt)
86{
87 int id = mnt->mnt_id;
88 spin_lock(&vfsmount_lock);
89 ida_remove(&mnt_id_ida, id);
90 if (mnt_id_start > id)
91 mnt_id_start = id;
92 spin_unlock(&vfsmount_lock);
93}
94
95
96
97
98
99
100static int mnt_alloc_group_id(struct vfsmount *mnt)
101{
102 int res;
103
104 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
105 return -ENOMEM;
106
107 res = ida_get_new_above(&mnt_group_ida,
108 mnt_group_start,
109 &mnt->mnt_group_id);
110 if (!res)
111 mnt_group_start = mnt->mnt_group_id + 1;
112
113 return res;
114}
115
116
117
118
119void mnt_release_group_id(struct vfsmount *mnt)
120{
121 int id = mnt->mnt_group_id;
122 ida_remove(&mnt_group_ida, id);
123 if (mnt_group_start > id)
124 mnt_group_start = id;
125 mnt->mnt_group_id = 0;
126}
127
128struct vfsmount *alloc_vfsmnt(const char *name)
129{
130 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
131 if (mnt) {
132 int err;
133
134 err = mnt_alloc_id(mnt);
135 if (err)
136 goto out_free_cache;
137
138 if (name) {
139 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
140 if (!mnt->mnt_devname)
141 goto out_free_id;
142 }
143
144 atomic_set(&mnt->mnt_count, 1);
145 INIT_LIST_HEAD(&mnt->mnt_hash);
146 INIT_LIST_HEAD(&mnt->mnt_child);
147 INIT_LIST_HEAD(&mnt->mnt_mounts);
148 INIT_LIST_HEAD(&mnt->mnt_list);
149 INIT_LIST_HEAD(&mnt->mnt_expire);
150 INIT_LIST_HEAD(&mnt->mnt_share);
151 INIT_LIST_HEAD(&mnt->mnt_slave_list);
152 INIT_LIST_HEAD(&mnt->mnt_slave);
153#ifdef CONFIG_SMP
154 mnt->mnt_writers = alloc_percpu(int);
155 if (!mnt->mnt_writers)
156 goto out_free_devname;
157#else
158 mnt->mnt_writers = 0;
159#endif
160 }
161 return mnt;
162
163#ifdef CONFIG_SMP
164out_free_devname:
165 kfree(mnt->mnt_devname);
166#endif
167out_free_id:
168 mnt_free_id(mnt);
169out_free_cache:
170 kmem_cache_free(mnt_cache, mnt);
171 return NULL;
172}
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193int __mnt_is_readonly(struct vfsmount *mnt)
194{
195 if (mnt->mnt_flags & MNT_READONLY)
196 return 1;
197 if (mnt->mnt_sb->s_flags & MS_RDONLY)
198 return 1;
199 return 0;
200}
201EXPORT_SYMBOL_GPL(__mnt_is_readonly);
202
203static inline void inc_mnt_writers(struct vfsmount *mnt)
204{
205#ifdef CONFIG_SMP
206 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
207#else
208 mnt->mnt_writers++;
209#endif
210}
211
212static inline void dec_mnt_writers(struct vfsmount *mnt)
213{
214#ifdef CONFIG_SMP
215 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
216#else
217 mnt->mnt_writers--;
218#endif
219}
220
221static unsigned int count_mnt_writers(struct vfsmount *mnt)
222{
223#ifdef CONFIG_SMP
224 unsigned int count = 0;
225 int cpu;
226
227 for_each_possible_cpu(cpu) {
228 count += *per_cpu_ptr(mnt->mnt_writers, cpu);
229 }
230
231 return count;
232#else
233 return mnt->mnt_writers;
234#endif
235}
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255int mnt_want_write(struct vfsmount *mnt)
256{
257 int ret = 0;
258
259 preempt_disable();
260 inc_mnt_writers(mnt);
261
262
263
264
265
266 smp_mb();
267 while (mnt->mnt_flags & MNT_WRITE_HOLD)
268 cpu_relax();
269
270
271
272
273
274 smp_rmb();
275 if (__mnt_is_readonly(mnt)) {
276 dec_mnt_writers(mnt);
277 ret = -EROFS;
278 goto out;
279 }
280out:
281 preempt_enable();
282 return ret;
283}
284EXPORT_SYMBOL_GPL(mnt_want_write);
285
286
287
288
289
290
291
292
293
294
295
296
297
298int mnt_clone_write(struct vfsmount *mnt)
299{
300
301 if (__mnt_is_readonly(mnt))
302 return -EROFS;
303 preempt_disable();
304 inc_mnt_writers(mnt);
305 preempt_enable();
306 return 0;
307}
308EXPORT_SYMBOL_GPL(mnt_clone_write);
309
310
311
312
313
314
315
316
317int mnt_want_write_file(struct file *file)
318{
319 struct inode *inode = file->f_dentry->d_inode;
320 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
321 return mnt_want_write(file->f_path.mnt);
322 else
323 return mnt_clone_write(file->f_path.mnt);
324}
325EXPORT_SYMBOL_GPL(mnt_want_write_file);
326
327
328
329
330
331
332
333
334
335void mnt_drop_write(struct vfsmount *mnt)
336{
337 preempt_disable();
338 dec_mnt_writers(mnt);
339 preempt_enable();
340}
341EXPORT_SYMBOL_GPL(mnt_drop_write);
342
343static int mnt_make_readonly(struct vfsmount *mnt)
344{
345 int ret = 0;
346
347 spin_lock(&vfsmount_lock);
348 mnt->mnt_flags |= MNT_WRITE_HOLD;
349
350
351
352
353 smp_mb();
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371 if (count_mnt_writers(mnt) > 0)
372 ret = -EBUSY;
373 else
374 mnt->mnt_flags |= MNT_READONLY;
375
376
377
378
379 smp_wmb();
380 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
381 spin_unlock(&vfsmount_lock);
382 return ret;
383}
384
385static void __mnt_unmake_readonly(struct vfsmount *mnt)
386{
387 spin_lock(&vfsmount_lock);
388 mnt->mnt_flags &= ~MNT_READONLY;
389 spin_unlock(&vfsmount_lock);
390}
391
392void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
393{
394 mnt->mnt_sb = sb;
395 mnt->mnt_root = dget(sb->s_root);
396}
397
398EXPORT_SYMBOL(simple_set_mnt);
399
400void free_vfsmnt(struct vfsmount *mnt)
401{
402 kfree(mnt->mnt_devname);
403 mnt_free_id(mnt);
404#ifdef CONFIG_SMP
405 free_percpu(mnt->mnt_writers);
406#endif
407 kmem_cache_free(mnt_cache, mnt);
408}
409
410
411
412
413
414struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
415 int dir)
416{
417 struct list_head *head = mount_hashtable + hash(mnt, dentry);
418 struct list_head *tmp = head;
419 struct vfsmount *p, *found = NULL;
420
421 for (;;) {
422 tmp = dir ? tmp->next : tmp->prev;
423 p = NULL;
424 if (tmp == head)
425 break;
426 p = list_entry(tmp, struct vfsmount, mnt_hash);
427 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
428 found = p;
429 break;
430 }
431 }
432 return found;
433}
434
435
436
437
438
439struct vfsmount *lookup_mnt(struct path *path)
440{
441 struct vfsmount *child_mnt;
442 spin_lock(&vfsmount_lock);
443 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
444 mntget(child_mnt);
445 spin_unlock(&vfsmount_lock);
446 return child_mnt;
447}
448
449static inline int check_mnt(struct vfsmount *mnt)
450{
451 return mnt->mnt_ns == current->nsproxy->mnt_ns;
452}
453
454static void touch_mnt_namespace(struct mnt_namespace *ns)
455{
456 if (ns) {
457 ns->event = ++event;
458 wake_up_interruptible(&ns->poll);
459 }
460}
461
462static void __touch_mnt_namespace(struct mnt_namespace *ns)
463{
464 if (ns && ns->event != event) {
465 ns->event = event;
466 wake_up_interruptible(&ns->poll);
467 }
468}
469
470static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
471{
472 old_path->dentry = mnt->mnt_mountpoint;
473 old_path->mnt = mnt->mnt_parent;
474 mnt->mnt_parent = mnt;
475 mnt->mnt_mountpoint = mnt->mnt_root;
476 list_del_init(&mnt->mnt_child);
477 list_del_init(&mnt->mnt_hash);
478 old_path->dentry->d_mounted--;
479}
480
481void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
482 struct vfsmount *child_mnt)
483{
484 child_mnt->mnt_parent = mntget(mnt);
485 child_mnt->mnt_mountpoint = dget(dentry);
486 dentry->d_mounted++;
487}
488
489static void attach_mnt(struct vfsmount *mnt, struct path *path)
490{
491 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
492 list_add_tail(&mnt->mnt_hash, mount_hashtable +
493 hash(path->mnt, path->dentry));
494 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
495}
496
497
498
499
500static void commit_tree(struct vfsmount *mnt)
501{
502 struct vfsmount *parent = mnt->mnt_parent;
503 struct vfsmount *m;
504 LIST_HEAD(head);
505 struct mnt_namespace *n = parent->mnt_ns;
506
507 BUG_ON(parent == mnt);
508
509 list_add_tail(&head, &mnt->mnt_list);
510 list_for_each_entry(m, &head, mnt_list)
511 m->mnt_ns = n;
512 list_splice(&head, n->list.prev);
513
514 list_add_tail(&mnt->mnt_hash, mount_hashtable +
515 hash(parent, mnt->mnt_mountpoint));
516 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
517 touch_mnt_namespace(n);
518}
519
520static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
521{
522 struct list_head *next = p->mnt_mounts.next;
523 if (next == &p->mnt_mounts) {
524 while (1) {
525 if (p == root)
526 return NULL;
527 next = p->mnt_child.next;
528 if (next != &p->mnt_parent->mnt_mounts)
529 break;
530 p = p->mnt_parent;
531 }
532 }
533 return list_entry(next, struct vfsmount, mnt_child);
534}
535
536static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
537{
538 struct list_head *prev = p->mnt_mounts.prev;
539 while (prev != &p->mnt_mounts) {
540 p = list_entry(prev, struct vfsmount, mnt_child);
541 prev = p->mnt_mounts.prev;
542 }
543 return p;
544}
545
546static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
547 int flag)
548{
549 struct super_block *sb = old->mnt_sb;
550 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
551
552 if (mnt) {
553 if (flag & (CL_SLAVE | CL_PRIVATE))
554 mnt->mnt_group_id = 0;
555 else
556 mnt->mnt_group_id = old->mnt_group_id;
557
558 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
559 int err = mnt_alloc_group_id(mnt);
560 if (err)
561 goto out_free;
562 }
563
564 mnt->mnt_flags = old->mnt_flags;
565 atomic_inc(&sb->s_active);
566 mnt->mnt_sb = sb;
567 mnt->mnt_root = dget(root);
568 mnt->mnt_mountpoint = mnt->mnt_root;
569 mnt->mnt_parent = mnt;
570
571 if (flag & CL_SLAVE) {
572 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
573 mnt->mnt_master = old;
574 CLEAR_MNT_SHARED(mnt);
575 } else if (!(flag & CL_PRIVATE)) {
576 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
577 list_add(&mnt->mnt_share, &old->mnt_share);
578 if (IS_MNT_SLAVE(old))
579 list_add(&mnt->mnt_slave, &old->mnt_slave);
580 mnt->mnt_master = old->mnt_master;
581 }
582 if (flag & CL_MAKE_SHARED)
583 set_mnt_shared(mnt);
584
585
586
587 if (flag & CL_EXPIRE) {
588 if (!list_empty(&old->mnt_expire))
589 list_add(&mnt->mnt_expire, &old->mnt_expire);
590 }
591 }
592 return mnt;
593
594 out_free:
595 free_vfsmnt(mnt);
596 return NULL;
597}
598
599static inline void __mntput(struct vfsmount *mnt)
600{
601 struct super_block *sb = mnt->mnt_sb;
602
603
604
605
606
607
608
609
610
611
612 WARN_ON(count_mnt_writers(mnt));
613 dput(mnt->mnt_root);
614 free_vfsmnt(mnt);
615 deactivate_super(sb);
616}
617
618void mntput_no_expire(struct vfsmount *mnt)
619{
620repeat:
621 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
622 if (likely(!mnt->mnt_pinned)) {
623 spin_unlock(&vfsmount_lock);
624 __mntput(mnt);
625 return;
626 }
627 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
628 mnt->mnt_pinned = 0;
629 spin_unlock(&vfsmount_lock);
630 acct_auto_close_mnt(mnt);
631 security_sb_umount_close(mnt);
632 goto repeat;
633 }
634}
635
636EXPORT_SYMBOL(mntput_no_expire);
637
638void mnt_pin(struct vfsmount *mnt)
639{
640 spin_lock(&vfsmount_lock);
641 mnt->mnt_pinned++;
642 spin_unlock(&vfsmount_lock);
643}
644
645EXPORT_SYMBOL(mnt_pin);
646
647void mnt_unpin(struct vfsmount *mnt)
648{
649 spin_lock(&vfsmount_lock);
650 if (mnt->mnt_pinned) {
651 atomic_inc(&mnt->mnt_count);
652 mnt->mnt_pinned--;
653 }
654 spin_unlock(&vfsmount_lock);
655}
656
657EXPORT_SYMBOL(mnt_unpin);
658
659static inline void mangle(struct seq_file *m, const char *s)
660{
661 seq_escape(m, s, " \t\n\\");
662}
663
664
665
666
667
668
669
670int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
671{
672 const char *options;
673
674 rcu_read_lock();
675 options = rcu_dereference(mnt->mnt_sb->s_options);
676
677 if (options != NULL && options[0]) {
678 seq_putc(m, ',');
679 mangle(m, options);
680 }
681 rcu_read_unlock();
682
683 return 0;
684}
685EXPORT_SYMBOL(generic_show_options);
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700void save_mount_options(struct super_block *sb, char *options)
701{
702 BUG_ON(sb->s_options);
703 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
704}
705EXPORT_SYMBOL(save_mount_options);
706
707void replace_mount_options(struct super_block *sb, char *options)
708{
709 char *old = sb->s_options;
710 rcu_assign_pointer(sb->s_options, options);
711 if (old) {
712 synchronize_rcu();
713 kfree(old);
714 }
715}
716EXPORT_SYMBOL(replace_mount_options);
717
718#ifdef CONFIG_PROC_FS
719
720static void *m_start(struct seq_file *m, loff_t *pos)
721{
722 struct proc_mounts *p = m->private;
723
724 down_read(&namespace_sem);
725 return seq_list_start(&p->ns->list, *pos);
726}
727
728static void *m_next(struct seq_file *m, void *v, loff_t *pos)
729{
730 struct proc_mounts *p = m->private;
731
732 return seq_list_next(v, &p->ns->list, pos);
733}
734
735static void m_stop(struct seq_file *m, void *v)
736{
737 up_read(&namespace_sem);
738}
739
740int mnt_had_events(struct proc_mounts *p)
741{
742 struct mnt_namespace *ns = p->ns;
743 int res = 0;
744
745 spin_lock(&vfsmount_lock);
746 if (p->event != ns->event) {
747 p->event = ns->event;
748 res = 1;
749 }
750 spin_unlock(&vfsmount_lock);
751
752 return res;
753}
754
755struct proc_fs_info {
756 int flag;
757 const char *str;
758};
759
760static int show_sb_opts(struct seq_file *m, struct super_block *sb)
761{
762 static const struct proc_fs_info fs_info[] = {
763 { MS_SYNCHRONOUS, ",sync" },
764 { MS_DIRSYNC, ",dirsync" },
765 { MS_MANDLOCK, ",mand" },
766 { 0, NULL }
767 };
768 const struct proc_fs_info *fs_infop;
769
770 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
771 if (sb->s_flags & fs_infop->flag)
772 seq_puts(m, fs_infop->str);
773 }
774
775 return security_sb_show_options(m, sb);
776}
777
778static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
779{
780 static const struct proc_fs_info mnt_info[] = {
781 { MNT_NOSUID, ",nosuid" },
782 { MNT_NODEV, ",nodev" },
783 { MNT_NOEXEC, ",noexec" },
784 { MNT_NOATIME, ",noatime" },
785 { MNT_NODIRATIME, ",nodiratime" },
786 { MNT_RELATIME, ",relatime" },
787 { MNT_STRICTATIME, ",strictatime" },
788 { 0, NULL }
789 };
790 const struct proc_fs_info *fs_infop;
791
792 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
793 if (mnt->mnt_flags & fs_infop->flag)
794 seq_puts(m, fs_infop->str);
795 }
796}
797
798static void show_type(struct seq_file *m, struct super_block *sb)
799{
800 mangle(m, sb->s_type->name);
801 if (sb->s_subtype && sb->s_subtype[0]) {
802 seq_putc(m, '.');
803 mangle(m, sb->s_subtype);
804 }
805}
806
807static int show_vfsmnt(struct seq_file *m, void *v)
808{
809 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
810 int err = 0;
811 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
812
813 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
814 seq_putc(m, ' ');
815 seq_path(m, &mnt_path, " \t\n\\");
816 seq_putc(m, ' ');
817 show_type(m, mnt->mnt_sb);
818 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
819 err = show_sb_opts(m, mnt->mnt_sb);
820 if (err)
821 goto out;
822 show_mnt_opts(m, mnt);
823 if (mnt->mnt_sb->s_op->show_options)
824 err = mnt->mnt_sb->s_op->show_options(m, mnt);
825 seq_puts(m, " 0 0\n");
826out:
827 return err;
828}
829
830const struct seq_operations mounts_op = {
831 .start = m_start,
832 .next = m_next,
833 .stop = m_stop,
834 .show = show_vfsmnt
835};
836
837static int show_mountinfo(struct seq_file *m, void *v)
838{
839 struct proc_mounts *p = m->private;
840 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
841 struct super_block *sb = mnt->mnt_sb;
842 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
843 struct path root = p->root;
844 int err = 0;
845
846 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
847 MAJOR(sb->s_dev), MINOR(sb->s_dev));
848 seq_dentry(m, mnt->mnt_root, " \t\n\\");
849 seq_putc(m, ' ');
850 seq_path_root(m, &mnt_path, &root, " \t\n\\");
851 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
852
853
854
855
856
857 return SEQ_SKIP;
858 }
859 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
860 show_mnt_opts(m, mnt);
861
862
863 if (IS_MNT_SHARED(mnt))
864 seq_printf(m, " shared:%i", mnt->mnt_group_id);
865 if (IS_MNT_SLAVE(mnt)) {
866 int master = mnt->mnt_master->mnt_group_id;
867 int dom = get_dominating_id(mnt, &p->root);
868 seq_printf(m, " master:%i", master);
869 if (dom && dom != master)
870 seq_printf(m, " propagate_from:%i", dom);
871 }
872 if (IS_MNT_UNBINDABLE(mnt))
873 seq_puts(m, " unbindable");
874
875
876 seq_puts(m, " - ");
877 show_type(m, sb);
878 seq_putc(m, ' ');
879 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
880 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
881 err = show_sb_opts(m, sb);
882 if (err)
883 goto out;
884 if (sb->s_op->show_options)
885 err = sb->s_op->show_options(m, mnt);
886 seq_putc(m, '\n');
887out:
888 return err;
889}
890
891const struct seq_operations mountinfo_op = {
892 .start = m_start,
893 .next = m_next,
894 .stop = m_stop,
895 .show = show_mountinfo,
896};
897
898static int show_vfsstat(struct seq_file *m, void *v)
899{
900 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
901 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
902 int err = 0;
903
904
905 if (mnt->mnt_devname) {
906 seq_puts(m, "device ");
907 mangle(m, mnt->mnt_devname);
908 } else
909 seq_puts(m, "no device");
910
911
912 seq_puts(m, " mounted on ");
913 seq_path(m, &mnt_path, " \t\n\\");
914 seq_putc(m, ' ');
915
916
917 seq_puts(m, "with fstype ");
918 show_type(m, mnt->mnt_sb);
919
920
921 if (mnt->mnt_sb->s_op->show_stats) {
922 seq_putc(m, ' ');
923 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
924 }
925
926 seq_putc(m, '\n');
927 return err;
928}
929
930const struct seq_operations mountstats_op = {
931 .start = m_start,
932 .next = m_next,
933 .stop = m_stop,
934 .show = show_vfsstat,
935};
936#endif
937
938
939
940
941
942
943
944
945
946int may_umount_tree(struct vfsmount *mnt)
947{
948 int actual_refs = 0;
949 int minimum_refs = 0;
950 struct vfsmount *p;
951
952 spin_lock(&vfsmount_lock);
953 for (p = mnt; p; p = next_mnt(p, mnt)) {
954 actual_refs += atomic_read(&p->mnt_count);
955 minimum_refs += 2;
956 }
957 spin_unlock(&vfsmount_lock);
958
959 if (actual_refs > minimum_refs)
960 return 0;
961
962 return 1;
963}
964
965EXPORT_SYMBOL(may_umount_tree);
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980int may_umount(struct vfsmount *mnt)
981{
982 int ret = 1;
983 down_read(&namespace_sem);
984 spin_lock(&vfsmount_lock);
985 if (propagate_mount_busy(mnt, 2))
986 ret = 0;
987 spin_unlock(&vfsmount_lock);
988 up_read(&namespace_sem);
989 return ret;
990}
991
992EXPORT_SYMBOL(may_umount);
993
994void release_mounts(struct list_head *head)
995{
996 struct vfsmount *mnt;
997 while (!list_empty(head)) {
998 mnt = list_first_entry(head, struct vfsmount, mnt_hash);
999 list_del_init(&mnt->mnt_hash);
1000 if (mnt->mnt_parent != mnt) {
1001 struct dentry *dentry;
1002 struct vfsmount *m;
1003 spin_lock(&vfsmount_lock);
1004 dentry = mnt->mnt_mountpoint;
1005 m = mnt->mnt_parent;
1006 mnt->mnt_mountpoint = mnt->mnt_root;
1007 mnt->mnt_parent = mnt;
1008 m->mnt_ghosts--;
1009 spin_unlock(&vfsmount_lock);
1010 dput(dentry);
1011 mntput(m);
1012 }
1013 mntput(mnt);
1014 }
1015}
1016
1017void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1018{
1019 struct vfsmount *p;
1020
1021 for (p = mnt; p; p = next_mnt(p, mnt))
1022 list_move(&p->mnt_hash, kill);
1023
1024 if (propagate)
1025 propagate_umount(kill);
1026
1027 list_for_each_entry(p, kill, mnt_hash) {
1028 list_del_init(&p->mnt_expire);
1029 list_del_init(&p->mnt_list);
1030 __touch_mnt_namespace(p->mnt_ns);
1031 p->mnt_ns = NULL;
1032 list_del_init(&p->mnt_child);
1033 if (p->mnt_parent != p) {
1034 p->mnt_parent->mnt_ghosts++;
1035 p->mnt_mountpoint->d_mounted--;
1036 }
1037 change_mnt_propagation(p, MS_PRIVATE);
1038 }
1039}
1040
1041static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
1042
1043static int do_umount(struct vfsmount *mnt, int flags)
1044{
1045 struct super_block *sb = mnt->mnt_sb;
1046 int retval;
1047 LIST_HEAD(umount_list);
1048
1049 retval = security_sb_umount(mnt, flags);
1050 if (retval)
1051 return retval;
1052
1053
1054
1055
1056
1057
1058
1059 if (flags & MNT_EXPIRE) {
1060 if (mnt == current->fs->root.mnt ||
1061 flags & (MNT_FORCE | MNT_DETACH))
1062 return -EINVAL;
1063
1064 if (atomic_read(&mnt->mnt_count) != 2)
1065 return -EBUSY;
1066
1067 if (!xchg(&mnt->mnt_expiry_mark, 1))
1068 return -EAGAIN;
1069 }
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1082 sb->s_op->umount_begin(sb);
1083 }
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1095
1096
1097
1098
1099 down_write(&sb->s_umount);
1100 if (!(sb->s_flags & MS_RDONLY))
1101 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1102 up_write(&sb->s_umount);
1103 return retval;
1104 }
1105
1106 down_write(&namespace_sem);
1107 spin_lock(&vfsmount_lock);
1108 event++;
1109
1110 if (!(flags & MNT_DETACH))
1111 shrink_submounts(mnt, &umount_list);
1112
1113 retval = -EBUSY;
1114 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1115 if (!list_empty(&mnt->mnt_list))
1116 umount_tree(mnt, 1, &umount_list);
1117 retval = 0;
1118 }
1119 spin_unlock(&vfsmount_lock);
1120 if (retval)
1121 security_sb_umount_busy(mnt);
1122 up_write(&namespace_sem);
1123 release_mounts(&umount_list);
1124 return retval;
1125}
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1136{
1137 struct path path;
1138 int retval;
1139 int lookup_flags = 0;
1140
1141 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1142 return -EINVAL;
1143
1144 if (!(flags & UMOUNT_NOFOLLOW))
1145 lookup_flags |= LOOKUP_FOLLOW;
1146
1147 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1148 if (retval)
1149 goto out;
1150 retval = -EINVAL;
1151 if (path.dentry != path.mnt->mnt_root)
1152 goto dput_and_out;
1153 if (!check_mnt(path.mnt))
1154 goto dput_and_out;
1155
1156 retval = -EPERM;
1157 if (!capable(CAP_SYS_ADMIN))
1158 goto dput_and_out;
1159
1160 retval = do_umount(path.mnt, flags);
1161dput_and_out:
1162
1163 dput(path.dentry);
1164 mntput_no_expire(path.mnt);
1165out:
1166 return retval;
1167}
1168
1169#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1170
1171
1172
1173
1174SYSCALL_DEFINE1(oldumount, char __user *, name)
1175{
1176 return sys_umount(name, 0);
1177}
1178
1179#endif
1180
1181static int mount_is_safe(struct path *path)
1182{
1183 if (capable(CAP_SYS_ADMIN))
1184 return 0;
1185 return -EPERM;
1186#ifdef notyet
1187 if (S_ISLNK(path->dentry->d_inode->i_mode))
1188 return -EPERM;
1189 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1190 if (current_uid() != path->dentry->d_inode->i_uid)
1191 return -EPERM;
1192 }
1193 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1194 return -EPERM;
1195 return 0;
1196#endif
1197}
1198
1199struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1200 int flag)
1201{
1202 struct vfsmount *res, *p, *q, *r, *s;
1203 struct path path;
1204
1205 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1206 return NULL;
1207
1208 res = q = clone_mnt(mnt, dentry, flag);
1209 if (!q)
1210 goto Enomem;
1211 q->mnt_mountpoint = mnt->mnt_mountpoint;
1212
1213 p = mnt;
1214 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1215 if (!is_subdir(r->mnt_mountpoint, dentry))
1216 continue;
1217
1218 for (s = r; s; s = next_mnt(s, r)) {
1219 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
1220 s = skip_mnt_tree(s);
1221 continue;
1222 }
1223 while (p != s->mnt_parent) {
1224 p = p->mnt_parent;
1225 q = q->mnt_parent;
1226 }
1227 p = s;
1228 path.mnt = q;
1229 path.dentry = p->mnt_mountpoint;
1230 q = clone_mnt(p, p->mnt_root, flag);
1231 if (!q)
1232 goto Enomem;
1233 spin_lock(&vfsmount_lock);
1234 list_add_tail(&q->mnt_list, &res->mnt_list);
1235 attach_mnt(q, &path);
1236 spin_unlock(&vfsmount_lock);
1237 }
1238 }
1239 return res;
1240Enomem:
1241 if (res) {
1242 LIST_HEAD(umount_list);
1243 spin_lock(&vfsmount_lock);
1244 umount_tree(res, 0, &umount_list);
1245 spin_unlock(&vfsmount_lock);
1246 release_mounts(&umount_list);
1247 }
1248 return NULL;
1249}
1250
1251struct vfsmount *collect_mounts(struct path *path)
1252{
1253 struct vfsmount *tree;
1254 down_write(&namespace_sem);
1255 tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
1256 up_write(&namespace_sem);
1257 return tree;
1258}
1259
1260void drop_collected_mounts(struct vfsmount *mnt)
1261{
1262 LIST_HEAD(umount_list);
1263 down_write(&namespace_sem);
1264 spin_lock(&vfsmount_lock);
1265 umount_tree(mnt, 0, &umount_list);
1266 spin_unlock(&vfsmount_lock);
1267 up_write(&namespace_sem);
1268 release_mounts(&umount_list);
1269}
1270
1271int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1272 struct vfsmount *root)
1273{
1274 struct vfsmount *mnt;
1275 int res = f(root, arg);
1276 if (res)
1277 return res;
1278 list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
1279 res = f(mnt, arg);
1280 if (res)
1281 return res;
1282 }
1283 return 0;
1284}
1285
1286static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1287{
1288 struct vfsmount *p;
1289
1290 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1291 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1292 mnt_release_group_id(p);
1293 }
1294}
1295
1296static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1297{
1298 struct vfsmount *p;
1299
1300 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1301 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1302 int err = mnt_alloc_group_id(p);
1303 if (err) {
1304 cleanup_group_ids(mnt, p);
1305 return err;
1306 }
1307 }
1308 }
1309
1310 return 0;
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376static int attach_recursive_mnt(struct vfsmount *source_mnt,
1377 struct path *path, struct path *parent_path)
1378{
1379 LIST_HEAD(tree_list);
1380 struct vfsmount *dest_mnt = path->mnt;
1381 struct dentry *dest_dentry = path->dentry;
1382 struct vfsmount *child, *p;
1383 int err;
1384
1385 if (IS_MNT_SHARED(dest_mnt)) {
1386 err = invent_group_ids(source_mnt, true);
1387 if (err)
1388 goto out;
1389 }
1390 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
1391 if (err)
1392 goto out_cleanup_ids;
1393
1394 spin_lock(&vfsmount_lock);
1395
1396 if (IS_MNT_SHARED(dest_mnt)) {
1397 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1398 set_mnt_shared(p);
1399 }
1400 if (parent_path) {
1401 detach_mnt(source_mnt, parent_path);
1402 attach_mnt(source_mnt, path);
1403 touch_mnt_namespace(parent_path->mnt->mnt_ns);
1404 } else {
1405 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1406 commit_tree(source_mnt);
1407 }
1408
1409 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
1410 list_del_init(&child->mnt_hash);
1411 commit_tree(child);
1412 }
1413 spin_unlock(&vfsmount_lock);
1414 return 0;
1415
1416 out_cleanup_ids:
1417 if (IS_MNT_SHARED(dest_mnt))
1418 cleanup_group_ids(source_mnt, NULL);
1419 out:
1420 return err;
1421}
1422
1423static int graft_tree(struct vfsmount *mnt, struct path *path)
1424{
1425 int err;
1426 if (mnt->mnt_sb->s_flags & MS_NOUSER)
1427 return -EINVAL;
1428
1429 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1430 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
1431 return -ENOTDIR;
1432
1433 err = -ENOENT;
1434 mutex_lock(&path->dentry->d_inode->i_mutex);
1435 if (cant_mount(path->dentry))
1436 goto out_unlock;
1437
1438 err = security_sb_check_sb(mnt, path);
1439 if (err)
1440 goto out_unlock;
1441
1442 err = -ENOENT;
1443 if (!d_unlinked(path->dentry))
1444 err = attach_recursive_mnt(mnt, path, NULL);
1445out_unlock:
1446 mutex_unlock(&path->dentry->d_inode->i_mutex);
1447 if (!err)
1448 security_sb_post_addmount(mnt, path);
1449 return err;
1450}
1451
1452
1453
1454
1455static int do_change_type(struct path *path, int flag)
1456{
1457 struct vfsmount *m, *mnt = path->mnt;
1458 int recurse = flag & MS_REC;
1459 int type = flag & ~MS_REC;
1460 int err = 0;
1461
1462 if (!capable(CAP_SYS_ADMIN))
1463 return -EPERM;
1464
1465 if (path->dentry != path->mnt->mnt_root)
1466 return -EINVAL;
1467
1468 down_write(&namespace_sem);
1469 if (type == MS_SHARED) {
1470 err = invent_group_ids(mnt, recurse);
1471 if (err)
1472 goto out_unlock;
1473 }
1474
1475 spin_lock(&vfsmount_lock);
1476 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1477 change_mnt_propagation(m, type);
1478 spin_unlock(&vfsmount_lock);
1479
1480 out_unlock:
1481 up_write(&namespace_sem);
1482 return err;
1483}
1484
1485
1486
1487
1488static int do_loopback(struct path *path, char *old_name,
1489 int recurse)
1490{
1491 struct path old_path;
1492 struct vfsmount *mnt = NULL;
1493 int err = mount_is_safe(path);
1494 if (err)
1495 return err;
1496 if (!old_name || !*old_name)
1497 return -EINVAL;
1498 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1499 if (err)
1500 return err;
1501
1502 down_write(&namespace_sem);
1503 err = -EINVAL;
1504 if (IS_MNT_UNBINDABLE(old_path.mnt))
1505 goto out;
1506
1507 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1508 goto out;
1509
1510 err = -ENOMEM;
1511 if (recurse)
1512 mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
1513 else
1514 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1515
1516 if (!mnt)
1517 goto out;
1518
1519 err = graft_tree(mnt, path);
1520 if (err) {
1521 LIST_HEAD(umount_list);
1522 spin_lock(&vfsmount_lock);
1523 umount_tree(mnt, 0, &umount_list);
1524 spin_unlock(&vfsmount_lock);
1525 release_mounts(&umount_list);
1526 }
1527
1528out:
1529 up_write(&namespace_sem);
1530 path_put(&old_path);
1531 return err;
1532}
1533
1534static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1535{
1536 int error = 0;
1537 int readonly_request = 0;
1538
1539 if (ms_flags & MS_RDONLY)
1540 readonly_request = 1;
1541 if (readonly_request == __mnt_is_readonly(mnt))
1542 return 0;
1543
1544 if (readonly_request)
1545 error = mnt_make_readonly(mnt);
1546 else
1547 __mnt_unmake_readonly(mnt);
1548 return error;
1549}
1550
1551
1552
1553
1554
1555
1556static int do_remount(struct path *path, int flags, int mnt_flags,
1557 void *data)
1558{
1559 int err;
1560 struct super_block *sb = path->mnt->mnt_sb;
1561
1562 if (!capable(CAP_SYS_ADMIN))
1563 return -EPERM;
1564
1565 if (!check_mnt(path->mnt))
1566 return -EINVAL;
1567
1568 if (path->dentry != path->mnt->mnt_root)
1569 return -EINVAL;
1570
1571 down_write(&sb->s_umount);
1572 if (flags & MS_BIND)
1573 err = change_mount_flags(path->mnt, flags);
1574 else
1575 err = do_remount_sb(sb, flags, data, 0);
1576 if (!err) {
1577 spin_lock(&vfsmount_lock);
1578 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
1579 path->mnt->mnt_flags = mnt_flags;
1580 spin_unlock(&vfsmount_lock);
1581 }
1582 up_write(&sb->s_umount);
1583 if (!err) {
1584 security_sb_post_remount(path->mnt, flags, data);
1585
1586 spin_lock(&vfsmount_lock);
1587 touch_mnt_namespace(path->mnt->mnt_ns);
1588 spin_unlock(&vfsmount_lock);
1589 }
1590 return err;
1591}
1592
1593static inline int tree_contains_unbindable(struct vfsmount *mnt)
1594{
1595 struct vfsmount *p;
1596 for (p = mnt; p; p = next_mnt(p, mnt)) {
1597 if (IS_MNT_UNBINDABLE(p))
1598 return 1;
1599 }
1600 return 0;
1601}
1602
1603static int do_move_mount(struct path *path, char *old_name)
1604{
1605 struct path old_path, parent_path;
1606 struct vfsmount *p;
1607 int err = 0;
1608 if (!capable(CAP_SYS_ADMIN))
1609 return -EPERM;
1610 if (!old_name || !*old_name)
1611 return -EINVAL;
1612 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1613 if (err)
1614 return err;
1615
1616 down_write(&namespace_sem);
1617 while (d_mountpoint(path->dentry) &&
1618 follow_down(path))
1619 ;
1620 err = -EINVAL;
1621 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1622 goto out;
1623
1624 err = -ENOENT;
1625 mutex_lock(&path->dentry->d_inode->i_mutex);
1626 if (cant_mount(path->dentry))
1627 goto out1;
1628
1629 if (d_unlinked(path->dentry))
1630 goto out1;
1631
1632 err = -EINVAL;
1633 if (old_path.dentry != old_path.mnt->mnt_root)
1634 goto out1;
1635
1636 if (old_path.mnt == old_path.mnt->mnt_parent)
1637 goto out1;
1638
1639 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1640 S_ISDIR(old_path.dentry->d_inode->i_mode))
1641 goto out1;
1642
1643
1644
1645 if (old_path.mnt->mnt_parent &&
1646 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1647 goto out1;
1648
1649
1650
1651
1652 if (IS_MNT_SHARED(path->mnt) &&
1653 tree_contains_unbindable(old_path.mnt))
1654 goto out1;
1655 err = -ELOOP;
1656 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
1657 if (p == old_path.mnt)
1658 goto out1;
1659
1660 err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
1661 if (err)
1662 goto out1;
1663
1664
1665
1666 list_del_init(&old_path.mnt->mnt_expire);
1667out1:
1668 mutex_unlock(&path->dentry->d_inode->i_mutex);
1669out:
1670 up_write(&namespace_sem);
1671 if (!err)
1672 path_put(&parent_path);
1673 path_put(&old_path);
1674 return err;
1675}
1676
1677
1678
1679
1680
1681static int do_new_mount(struct path *path, char *type, int flags,
1682 int mnt_flags, char *name, void *data)
1683{
1684 struct vfsmount *mnt;
1685
1686 if (!type)
1687 return -EINVAL;
1688
1689
1690 if (!capable(CAP_SYS_ADMIN))
1691 return -EPERM;
1692
1693 lock_kernel();
1694 mnt = do_kern_mount(type, flags, name, data);
1695 unlock_kernel();
1696 if (IS_ERR(mnt))
1697 return PTR_ERR(mnt);
1698
1699 return do_add_mount(mnt, path, mnt_flags, NULL);
1700}
1701
1702
1703
1704
1705
1706int do_add_mount(struct vfsmount *newmnt, struct path *path,
1707 int mnt_flags, struct list_head *fslist)
1708{
1709 int err;
1710
1711 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1712
1713 down_write(&namespace_sem);
1714
1715 while (d_mountpoint(path->dentry) &&
1716 follow_down(path))
1717 ;
1718 err = -EINVAL;
1719 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
1720 goto unlock;
1721
1722
1723 err = -EBUSY;
1724 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1725 path->mnt->mnt_root == path->dentry)
1726 goto unlock;
1727
1728 err = -EINVAL;
1729 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1730 goto unlock;
1731
1732 newmnt->mnt_flags = mnt_flags;
1733 if ((err = graft_tree(newmnt, path)))
1734 goto unlock;
1735
1736 if (fslist)
1737 list_add_tail(&newmnt->mnt_expire, fslist);
1738
1739 up_write(&namespace_sem);
1740 return 0;
1741
1742unlock:
1743 up_write(&namespace_sem);
1744 mntput(newmnt);
1745 return err;
1746}
1747
1748EXPORT_SYMBOL_GPL(do_add_mount);
1749
1750
1751
1752
1753
1754
1755void mark_mounts_for_expiry(struct list_head *mounts)
1756{
1757 struct vfsmount *mnt, *next;
1758 LIST_HEAD(graveyard);
1759 LIST_HEAD(umounts);
1760
1761 if (list_empty(mounts))
1762 return;
1763
1764 down_write(&namespace_sem);
1765 spin_lock(&vfsmount_lock);
1766
1767
1768
1769
1770
1771
1772
1773 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1774 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1775 propagate_mount_busy(mnt, 1))
1776 continue;
1777 list_move(&mnt->mnt_expire, &graveyard);
1778 }
1779 while (!list_empty(&graveyard)) {
1780 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
1781 touch_mnt_namespace(mnt->mnt_ns);
1782 umount_tree(mnt, 1, &umounts);
1783 }
1784 spin_unlock(&vfsmount_lock);
1785 up_write(&namespace_sem);
1786
1787 release_mounts(&umounts);
1788}
1789
1790EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1791
1792
1793
1794
1795
1796
1797
1798static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1799{
1800 struct vfsmount *this_parent = parent;
1801 struct list_head *next;
1802 int found = 0;
1803
1804repeat:
1805 next = this_parent->mnt_mounts.next;
1806resume:
1807 while (next != &this_parent->mnt_mounts) {
1808 struct list_head *tmp = next;
1809 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1810
1811 next = tmp->next;
1812 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1813 continue;
1814
1815
1816
1817 if (!list_empty(&mnt->mnt_mounts)) {
1818 this_parent = mnt;
1819 goto repeat;
1820 }
1821
1822 if (!propagate_mount_busy(mnt, 1)) {
1823 list_move_tail(&mnt->mnt_expire, graveyard);
1824 found++;
1825 }
1826 }
1827
1828
1829
1830 if (this_parent != parent) {
1831 next = this_parent->mnt_child.next;
1832 this_parent = this_parent->mnt_parent;
1833 goto resume;
1834 }
1835 return found;
1836}
1837
1838
1839
1840
1841
1842static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1843{
1844 LIST_HEAD(graveyard);
1845 struct vfsmount *m;
1846
1847
1848 while (select_submounts(mnt, &graveyard)) {
1849 while (!list_empty(&graveyard)) {
1850 m = list_first_entry(&graveyard, struct vfsmount,
1851 mnt_expire);
1852 touch_mnt_namespace(m->mnt_ns);
1853 umount_tree(m, 1, umounts);
1854 }
1855 }
1856}
1857
1858
1859
1860
1861
1862
1863
1864static long exact_copy_from_user(void *to, const void __user * from,
1865 unsigned long n)
1866{
1867 char *t = to;
1868 const char __user *f = from;
1869 char c;
1870
1871 if (!access_ok(VERIFY_READ, from, n))
1872 return n;
1873
1874 while (n) {
1875 if (__get_user(c, f)) {
1876 memset(t, 0, n);
1877 break;
1878 }
1879 *t++ = c;
1880 f++;
1881 n--;
1882 }
1883 return n;
1884}
1885
1886int copy_mount_options(const void __user * data, unsigned long *where)
1887{
1888 int i;
1889 unsigned long page;
1890 unsigned long size;
1891
1892 *where = 0;
1893 if (!data)
1894 return 0;
1895
1896 if (!(page = __get_free_page(GFP_KERNEL)))
1897 return -ENOMEM;
1898
1899
1900
1901
1902
1903
1904 size = TASK_SIZE - (unsigned long)data;
1905 if (size > PAGE_SIZE)
1906 size = PAGE_SIZE;
1907
1908 i = size - exact_copy_from_user((void *)page, data, size);
1909 if (!i) {
1910 free_page(page);
1911 return -EFAULT;
1912 }
1913 if (i != PAGE_SIZE)
1914 memset((char *)page + i, 0, PAGE_SIZE - i);
1915 *where = page;
1916 return 0;
1917}
1918
1919int copy_mount_string(const void __user *data, char **where)
1920{
1921 char *tmp;
1922
1923 if (!data) {
1924 *where = NULL;
1925 return 0;
1926 }
1927
1928 tmp = strndup_user(data, PAGE_SIZE);
1929 if (IS_ERR(tmp))
1930 return PTR_ERR(tmp);
1931
1932 *where = tmp;
1933 return 0;
1934}
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950long do_mount(char *dev_name, char *dir_name, char *type_page,
1951 unsigned long flags, void *data_page)
1952{
1953 struct path path;
1954 int retval = 0;
1955 int mnt_flags = 0;
1956
1957
1958 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1959 flags &= ~MS_MGC_MSK;
1960
1961
1962
1963 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1964 return -EINVAL;
1965
1966 if (data_page)
1967 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1968
1969
1970 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1971 if (retval)
1972 return retval;
1973
1974 retval = security_sb_mount(dev_name, &path,
1975 type_page, flags, data_page);
1976 if (retval)
1977 goto dput_out;
1978
1979
1980 if (!(flags & MS_NOATIME))
1981 mnt_flags |= MNT_RELATIME;
1982
1983
1984 if (flags & MS_NOSUID)
1985 mnt_flags |= MNT_NOSUID;
1986 if (flags & MS_NODEV)
1987 mnt_flags |= MNT_NODEV;
1988 if (flags & MS_NOEXEC)
1989 mnt_flags |= MNT_NOEXEC;
1990 if (flags & MS_NOATIME)
1991 mnt_flags |= MNT_NOATIME;
1992 if (flags & MS_NODIRATIME)
1993 mnt_flags |= MNT_NODIRATIME;
1994 if (flags & MS_STRICTATIME)
1995 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
1996 if (flags & MS_RDONLY)
1997 mnt_flags |= MNT_READONLY;
1998
1999 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
2000 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2001 MS_STRICTATIME);
2002
2003 if (flags & MS_REMOUNT)
2004 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2005 data_page);
2006 else if (flags & MS_BIND)
2007 retval = do_loopback(&path, dev_name, flags & MS_REC);
2008 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2009 retval = do_change_type(&path, flags);
2010 else if (flags & MS_MOVE)
2011 retval = do_move_mount(&path, dev_name);
2012 else
2013 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2014 dev_name, data_page);
2015dput_out:
2016 path_put(&path);
2017 return retval;
2018}
2019
2020static struct mnt_namespace *alloc_mnt_ns(void)
2021{
2022 struct mnt_namespace *new_ns;
2023
2024 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2025 if (!new_ns)
2026 return ERR_PTR(-ENOMEM);
2027 atomic_set(&new_ns->count, 1);
2028 new_ns->root = NULL;
2029 INIT_LIST_HEAD(&new_ns->list);
2030 init_waitqueue_head(&new_ns->poll);
2031 new_ns->event = 0;
2032 return new_ns;
2033}
2034
2035
2036
2037
2038
2039static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2040 struct fs_struct *fs)
2041{
2042 struct mnt_namespace *new_ns;
2043 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2044 struct vfsmount *p, *q;
2045
2046 new_ns = alloc_mnt_ns();
2047 if (IS_ERR(new_ns))
2048 return new_ns;
2049
2050 down_write(&namespace_sem);
2051
2052 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
2053 CL_COPY_ALL | CL_EXPIRE);
2054 if (!new_ns->root) {
2055 up_write(&namespace_sem);
2056 kfree(new_ns);
2057 return ERR_PTR(-ENOMEM);
2058 }
2059 spin_lock(&vfsmount_lock);
2060 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2061 spin_unlock(&vfsmount_lock);
2062
2063
2064
2065
2066
2067
2068 p = mnt_ns->root;
2069 q = new_ns->root;
2070 while (p) {
2071 q->mnt_ns = new_ns;
2072 if (fs) {
2073 if (p == fs->root.mnt) {
2074 rootmnt = p;
2075 fs->root.mnt = mntget(q);
2076 }
2077 if (p == fs->pwd.mnt) {
2078 pwdmnt = p;
2079 fs->pwd.mnt = mntget(q);
2080 }
2081 }
2082 p = next_mnt(p, mnt_ns->root);
2083 q = next_mnt(q, new_ns->root);
2084 }
2085 up_write(&namespace_sem);
2086
2087 if (rootmnt)
2088 mntput(rootmnt);
2089 if (pwdmnt)
2090 mntput(pwdmnt);
2091
2092 return new_ns;
2093}
2094
2095struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2096 struct fs_struct *new_fs)
2097{
2098 struct mnt_namespace *new_ns;
2099
2100 BUG_ON(!ns);
2101 get_mnt_ns(ns);
2102
2103 if (!(flags & CLONE_NEWNS))
2104 return ns;
2105
2106 new_ns = dup_mnt_ns(ns, new_fs);
2107
2108 put_mnt_ns(ns);
2109 return new_ns;
2110}
2111
2112
2113
2114
2115
2116struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
2117{
2118 struct mnt_namespace *new_ns;
2119
2120 new_ns = alloc_mnt_ns();
2121 if (!IS_ERR(new_ns)) {
2122 mnt->mnt_ns = new_ns;
2123 new_ns->root = mnt;
2124 list_add(&new_ns->list, &new_ns->root->mnt_list);
2125 }
2126 return new_ns;
2127}
2128EXPORT_SYMBOL(create_mnt_ns);
2129
2130SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2131 char __user *, type, unsigned long, flags, void __user *, data)
2132{
2133 int ret;
2134 char *kernel_type;
2135 char *kernel_dir;
2136 char *kernel_dev;
2137 unsigned long data_page;
2138
2139 ret = copy_mount_string(type, &kernel_type);
2140 if (ret < 0)
2141 goto out_type;
2142
2143 kernel_dir = getname(dir_name);
2144 if (IS_ERR(kernel_dir)) {
2145 ret = PTR_ERR(kernel_dir);
2146 goto out_dir;
2147 }
2148
2149 ret = copy_mount_string(dev_name, &kernel_dev);
2150 if (ret < 0)
2151 goto out_dev;
2152
2153 ret = copy_mount_options(data, &data_page);
2154 if (ret < 0)
2155 goto out_data;
2156
2157 ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
2158 (void *) data_page);
2159
2160 free_page(data_page);
2161out_data:
2162 kfree(kernel_dev);
2163out_dev:
2164 putname(kernel_dir);
2165out_dir:
2166 kfree(kernel_type);
2167out_type:
2168 return ret;
2169}
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2197 const char __user *, put_old)
2198{
2199 struct vfsmount *tmp;
2200 struct path new, old, parent_path, root_parent, root;
2201 int error;
2202
2203 if (!capable(CAP_SYS_ADMIN))
2204 return -EPERM;
2205
2206 error = user_path_dir(new_root, &new);
2207 if (error)
2208 goto out0;
2209 error = -EINVAL;
2210 if (!check_mnt(new.mnt))
2211 goto out1;
2212
2213 error = user_path_dir(put_old, &old);
2214 if (error)
2215 goto out1;
2216
2217 error = security_sb_pivotroot(&old, &new);
2218 if (error) {
2219 path_put(&old);
2220 goto out1;
2221 }
2222
2223 read_lock(¤t->fs->lock);
2224 root = current->fs->root;
2225 path_get(¤t->fs->root);
2226 read_unlock(¤t->fs->lock);
2227 down_write(&namespace_sem);
2228 mutex_lock(&old.dentry->d_inode->i_mutex);
2229 error = -EINVAL;
2230 if (IS_MNT_SHARED(old.mnt) ||
2231 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2232 IS_MNT_SHARED(root.mnt->mnt_parent))
2233 goto out2;
2234 if (!check_mnt(root.mnt))
2235 goto out2;
2236 error = -ENOENT;
2237 if (cant_mount(old.dentry))
2238 goto out2;
2239 if (d_unlinked(new.dentry))
2240 goto out2;
2241 if (d_unlinked(old.dentry))
2242 goto out2;
2243 error = -EBUSY;
2244 if (new.mnt == root.mnt ||
2245 old.mnt == root.mnt)
2246 goto out2;
2247 error = -EINVAL;
2248 if (root.mnt->mnt_root != root.dentry)
2249 goto out2;
2250 if (root.mnt->mnt_parent == root.mnt)
2251 goto out2;
2252 if (new.mnt->mnt_root != new.dentry)
2253 goto out2;
2254 if (new.mnt->mnt_parent == new.mnt)
2255 goto out2;
2256
2257 tmp = old.mnt;
2258 spin_lock(&vfsmount_lock);
2259 if (tmp != new.mnt) {
2260 for (;;) {
2261 if (tmp->mnt_parent == tmp)
2262 goto out3;
2263 if (tmp->mnt_parent == new.mnt)
2264 break;
2265 tmp = tmp->mnt_parent;
2266 }
2267 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2268 goto out3;
2269 } else if (!is_subdir(old.dentry, new.dentry))
2270 goto out3;
2271 detach_mnt(new.mnt, &parent_path);
2272 detach_mnt(root.mnt, &root_parent);
2273
2274 attach_mnt(root.mnt, &old);
2275
2276 attach_mnt(new.mnt, &root_parent);
2277 touch_mnt_namespace(current->nsproxy->mnt_ns);
2278 spin_unlock(&vfsmount_lock);
2279 chroot_fs_refs(&root, &new);
2280 security_sb_post_pivotroot(&root, &new);
2281 error = 0;
2282 path_put(&root_parent);
2283 path_put(&parent_path);
2284out2:
2285 mutex_unlock(&old.dentry->d_inode->i_mutex);
2286 up_write(&namespace_sem);
2287 path_put(&root);
2288 path_put(&old);
2289out1:
2290 path_put(&new);
2291out0:
2292 return error;
2293out3:
2294 spin_unlock(&vfsmount_lock);
2295 goto out2;
2296}
2297
2298static void __init init_mount_tree(void)
2299{
2300 struct vfsmount *mnt;
2301 struct mnt_namespace *ns;
2302 struct path root;
2303
2304 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2305 if (IS_ERR(mnt))
2306 panic("Can't create rootfs");
2307 ns = create_mnt_ns(mnt);
2308 if (IS_ERR(ns))
2309 panic("Can't allocate initial namespace");
2310
2311 init_task.nsproxy->mnt_ns = ns;
2312 get_mnt_ns(ns);
2313
2314 root.mnt = ns->root;
2315 root.dentry = ns->root->mnt_root;
2316
2317 set_fs_pwd(current->fs, &root);
2318 set_fs_root(current->fs, &root);
2319}
2320
2321void __init mnt_init(void)
2322{
2323 unsigned u;
2324 int err;
2325
2326 init_rwsem(&namespace_sem);
2327
2328 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
2329 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2330
2331 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2332
2333 if (!mount_hashtable)
2334 panic("Failed to allocate mount hash table\n");
2335
2336 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
2337
2338 for (u = 0; u < HASH_SIZE; u++)
2339 INIT_LIST_HEAD(&mount_hashtable[u]);
2340
2341 err = sysfs_init();
2342 if (err)
2343 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2344 __func__, err);
2345 fs_kobj = kobject_create_and_add("fs", NULL);
2346 if (!fs_kobj)
2347 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2348 init_rootfs();
2349 init_mount_tree();
2350}
2351
2352void put_mnt_ns(struct mnt_namespace *ns)
2353{
2354 LIST_HEAD(umount_list);
2355
2356 if (!atomic_dec_and_test(&ns->count))
2357 return;
2358 down_write(&namespace_sem);
2359 spin_lock(&vfsmount_lock);
2360 umount_tree(ns->root, 0, &umount_list);
2361 spin_unlock(&vfsmount_lock);
2362 up_write(&namespace_sem);
2363 release_mounts(&umount_list);
2364 kfree(ns);
2365}
2366EXPORT_SYMBOL(put_mnt_ns);
2367