1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/slab.h>
13#include <linux/sched.h>
14#include <linux/smp_lock.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/acct.h>
18#include <linux/capability.h>
19#include <linux/cpumask.h>
20#include <linux/module.h>
21#include <linux/sysfs.h>
22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h>
24#include <linux/namei.h>
25#include <linux/security.h>
26#include <linux/mount.h>
27#include <linux/ramfs.h>
28#include <linux/log2.h>
29#include <linux/idr.h>
30#include <linux/fs_struct.h>
31#include <asm/uaccess.h>
32#include <asm/unistd.h>
33#include "pnode.h"
34#include "internal.h"
35
36#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
37#define HASH_SIZE (1UL << HASH_SHIFT)
38
39
40__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
41
42static int event;
43static DEFINE_IDA(mnt_id_ida);
44static DEFINE_IDA(mnt_group_ida);
45
46static struct list_head *mount_hashtable __read_mostly;
47static struct kmem_cache *mnt_cache __read_mostly;
48static struct rw_semaphore namespace_sem;
49
50
51struct kobject *fs_kobj;
52EXPORT_SYMBOL_GPL(fs_kobj);
53
54static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
55{
56 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
57 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
58 tmp = tmp + (tmp >> HASH_SHIFT);
59 return tmp & (HASH_SIZE - 1);
60}
61
62#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
63
64
65static int mnt_alloc_id(struct vfsmount *mnt)
66{
67 int res;
68
69retry:
70 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
71 spin_lock(&vfsmount_lock);
72 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
73 spin_unlock(&vfsmount_lock);
74 if (res == -EAGAIN)
75 goto retry;
76
77 return res;
78}
79
80static void mnt_free_id(struct vfsmount *mnt)
81{
82 spin_lock(&vfsmount_lock);
83 ida_remove(&mnt_id_ida, mnt->mnt_id);
84 spin_unlock(&vfsmount_lock);
85}
86
87
88
89
90
91
92static int mnt_alloc_group_id(struct vfsmount *mnt)
93{
94 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
95 return -ENOMEM;
96
97 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
98}
99
100
101
102
103void mnt_release_group_id(struct vfsmount *mnt)
104{
105 ida_remove(&mnt_group_ida, mnt->mnt_group_id);
106 mnt->mnt_group_id = 0;
107}
108
109struct vfsmount *alloc_vfsmnt(const char *name)
110{
111 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
112 if (mnt) {
113 int err;
114
115 err = mnt_alloc_id(mnt);
116 if (err)
117 goto out_free_cache;
118
119 if (name) {
120 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
121 if (!mnt->mnt_devname)
122 goto out_free_id;
123 }
124
125 atomic_set(&mnt->mnt_count, 1);
126 INIT_LIST_HEAD(&mnt->mnt_hash);
127 INIT_LIST_HEAD(&mnt->mnt_child);
128 INIT_LIST_HEAD(&mnt->mnt_mounts);
129 INIT_LIST_HEAD(&mnt->mnt_list);
130 INIT_LIST_HEAD(&mnt->mnt_expire);
131 INIT_LIST_HEAD(&mnt->mnt_share);
132 INIT_LIST_HEAD(&mnt->mnt_slave_list);
133 INIT_LIST_HEAD(&mnt->mnt_slave);
134 atomic_set(&mnt->__mnt_writers, 0);
135 }
136 return mnt;
137
138out_free_id:
139 mnt_free_id(mnt);
140out_free_cache:
141 kmem_cache_free(mnt_cache, mnt);
142 return NULL;
143}
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164int __mnt_is_readonly(struct vfsmount *mnt)
165{
166 if (mnt->mnt_flags & MNT_READONLY)
167 return 1;
168 if (mnt->mnt_sb->s_flags & MS_RDONLY)
169 return 1;
170 return 0;
171}
172EXPORT_SYMBOL_GPL(__mnt_is_readonly);
173
174struct mnt_writer {
175
176
177
178
179 spinlock_t lock;
180 struct lock_class_key lock_class;
181 unsigned long count;
182 struct vfsmount *mnt;
183} ____cacheline_aligned_in_smp;
184static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
185
186static int __init init_mnt_writers(void)
187{
188 int cpu;
189 for_each_possible_cpu(cpu) {
190 struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
191 spin_lock_init(&writer->lock);
192 lockdep_set_class(&writer->lock, &writer->lock_class);
193 writer->count = 0;
194 }
195 return 0;
196}
197fs_initcall(init_mnt_writers);
198
199static void unlock_mnt_writers(void)
200{
201 int cpu;
202 struct mnt_writer *cpu_writer;
203
204 for_each_possible_cpu(cpu) {
205 cpu_writer = &per_cpu(mnt_writers, cpu);
206 spin_unlock(&cpu_writer->lock);
207 }
208}
209
210static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
211{
212 if (!cpu_writer->mnt)
213 return;
214
215
216
217
218 if (!cpu_writer->count)
219 return;
220 atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
221 cpu_writer->count = 0;
222}
223
224
225
226static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
227 struct vfsmount *mnt)
228{
229 if (cpu_writer->mnt == mnt)
230 return;
231 __clear_mnt_count(cpu_writer);
232 cpu_writer->mnt = mnt;
233}
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253int mnt_want_write(struct vfsmount *mnt)
254{
255 int ret = 0;
256 struct mnt_writer *cpu_writer;
257
258 cpu_writer = &get_cpu_var(mnt_writers);
259 spin_lock(&cpu_writer->lock);
260 if (__mnt_is_readonly(mnt)) {
261 ret = -EROFS;
262 goto out;
263 }
264 use_cpu_writer_for_mount(cpu_writer, mnt);
265 cpu_writer->count++;
266out:
267 spin_unlock(&cpu_writer->lock);
268 put_cpu_var(mnt_writers);
269 return ret;
270}
271EXPORT_SYMBOL_GPL(mnt_want_write);
272
273static void lock_mnt_writers(void)
274{
275 int cpu;
276 struct mnt_writer *cpu_writer;
277
278 for_each_possible_cpu(cpu) {
279 cpu_writer = &per_cpu(mnt_writers, cpu);
280 spin_lock(&cpu_writer->lock);
281 __clear_mnt_count(cpu_writer);
282 cpu_writer->mnt = NULL;
283 }
284}
285
286
287
288
289
290
291
292
293static void handle_write_count_underflow(struct vfsmount *mnt)
294{
295 if (atomic_read(&mnt->__mnt_writers) >=
296 MNT_WRITER_UNDERFLOW_LIMIT)
297 return;
298
299
300
301
302
303 lock_mnt_writers();
304
305
306
307 spin_lock(&vfsmount_lock);
308
309
310
311
312
313 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
314 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
315 WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
316 "count: %d\n",
317 mnt, atomic_read(&mnt->__mnt_writers));
318
319 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
320 }
321 spin_unlock(&vfsmount_lock);
322 unlock_mnt_writers();
323}
324
325
326
327
328
329
330
331
332
333void mnt_drop_write(struct vfsmount *mnt)
334{
335 int must_check_underflow = 0;
336 struct mnt_writer *cpu_writer;
337
338 cpu_writer = &get_cpu_var(mnt_writers);
339 spin_lock(&cpu_writer->lock);
340
341 use_cpu_writer_for_mount(cpu_writer, mnt);
342 if (cpu_writer->count > 0) {
343 cpu_writer->count--;
344 } else {
345 must_check_underflow = 1;
346 atomic_dec(&mnt->__mnt_writers);
347 }
348
349 spin_unlock(&cpu_writer->lock);
350
351
352
353
354
355 if (must_check_underflow)
356 handle_write_count_underflow(mnt);
357
358
359
360
361
362
363
364
365 put_cpu_var(mnt_writers);
366}
367EXPORT_SYMBOL_GPL(mnt_drop_write);
368
369static int mnt_make_readonly(struct vfsmount *mnt)
370{
371 int ret = 0;
372
373 lock_mnt_writers();
374
375
376
377 if (atomic_read(&mnt->__mnt_writers) > 0) {
378 ret = -EBUSY;
379 goto out;
380 }
381
382
383
384
385 spin_lock(&vfsmount_lock);
386 if (!ret)
387 mnt->mnt_flags |= MNT_READONLY;
388 spin_unlock(&vfsmount_lock);
389out:
390 unlock_mnt_writers();
391 return ret;
392}
393
394static void __mnt_unmake_readonly(struct vfsmount *mnt)
395{
396 spin_lock(&vfsmount_lock);
397 mnt->mnt_flags &= ~MNT_READONLY;
398 spin_unlock(&vfsmount_lock);
399}
400
401void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
402{
403 mnt->mnt_sb = sb;
404 mnt->mnt_root = dget(sb->s_root);
405}
406
407EXPORT_SYMBOL(simple_set_mnt);
408
409void free_vfsmnt(struct vfsmount *mnt)
410{
411 kfree(mnt->mnt_devname);
412 mnt_free_id(mnt);
413 kmem_cache_free(mnt_cache, mnt);
414}
415
416
417
418
419
420struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
421 int dir)
422{
423 struct list_head *head = mount_hashtable + hash(mnt, dentry);
424 struct list_head *tmp = head;
425 struct vfsmount *p, *found = NULL;
426
427 for (;;) {
428 tmp = dir ? tmp->next : tmp->prev;
429 p = NULL;
430 if (tmp == head)
431 break;
432 p = list_entry(tmp, struct vfsmount, mnt_hash);
433 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
434 found = p;
435 break;
436 }
437 }
438 return found;
439}
440
441
442
443
444
445struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
446{
447 struct vfsmount *child_mnt;
448 spin_lock(&vfsmount_lock);
449 if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
450 mntget(child_mnt);
451 spin_unlock(&vfsmount_lock);
452 return child_mnt;
453}
454
455static inline int check_mnt(struct vfsmount *mnt)
456{
457 return mnt->mnt_ns == current->nsproxy->mnt_ns;
458}
459
460static void touch_mnt_namespace(struct mnt_namespace *ns)
461{
462 if (ns) {
463 ns->event = ++event;
464 wake_up_interruptible(&ns->poll);
465 }
466}
467
468static void __touch_mnt_namespace(struct mnt_namespace *ns)
469{
470 if (ns && ns->event != event) {
471 ns->event = event;
472 wake_up_interruptible(&ns->poll);
473 }
474}
475
476static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
477{
478 old_path->dentry = mnt->mnt_mountpoint;
479 old_path->mnt = mnt->mnt_parent;
480 mnt->mnt_parent = mnt;
481 mnt->mnt_mountpoint = mnt->mnt_root;
482 list_del_init(&mnt->mnt_child);
483 list_del_init(&mnt->mnt_hash);
484 old_path->dentry->d_mounted--;
485}
486
487void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
488 struct vfsmount *child_mnt)
489{
490 child_mnt->mnt_parent = mntget(mnt);
491 child_mnt->mnt_mountpoint = dget(dentry);
492 dentry->d_mounted++;
493}
494
495static void attach_mnt(struct vfsmount *mnt, struct path *path)
496{
497 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
498 list_add_tail(&mnt->mnt_hash, mount_hashtable +
499 hash(path->mnt, path->dentry));
500 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
501}
502
503
504
505
506static void commit_tree(struct vfsmount *mnt)
507{
508 struct vfsmount *parent = mnt->mnt_parent;
509 struct vfsmount *m;
510 LIST_HEAD(head);
511 struct mnt_namespace *n = parent->mnt_ns;
512
513 BUG_ON(parent == mnt);
514
515 list_add_tail(&head, &mnt->mnt_list);
516 list_for_each_entry(m, &head, mnt_list)
517 m->mnt_ns = n;
518 list_splice(&head, n->list.prev);
519
520 list_add_tail(&mnt->mnt_hash, mount_hashtable +
521 hash(parent, mnt->mnt_mountpoint));
522 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
523 touch_mnt_namespace(n);
524}
525
526static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
527{
528 struct list_head *next = p->mnt_mounts.next;
529 if (next == &p->mnt_mounts) {
530 while (1) {
531 if (p == root)
532 return NULL;
533 next = p->mnt_child.next;
534 if (next != &p->mnt_parent->mnt_mounts)
535 break;
536 p = p->mnt_parent;
537 }
538 }
539 return list_entry(next, struct vfsmount, mnt_child);
540}
541
542static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
543{
544 struct list_head *prev = p->mnt_mounts.prev;
545 while (prev != &p->mnt_mounts) {
546 p = list_entry(prev, struct vfsmount, mnt_child);
547 prev = p->mnt_mounts.prev;
548 }
549 return p;
550}
551
552static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
553 int flag)
554{
555 struct super_block *sb = old->mnt_sb;
556 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
557
558 if (mnt) {
559 if (flag & (CL_SLAVE | CL_PRIVATE))
560 mnt->mnt_group_id = 0;
561 else
562 mnt->mnt_group_id = old->mnt_group_id;
563
564 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
565 int err = mnt_alloc_group_id(mnt);
566 if (err)
567 goto out_free;
568 }
569
570 mnt->mnt_flags = old->mnt_flags;
571 atomic_inc(&sb->s_active);
572 mnt->mnt_sb = sb;
573 mnt->mnt_root = dget(root);
574 mnt->mnt_mountpoint = mnt->mnt_root;
575 mnt->mnt_parent = mnt;
576
577 if (flag & CL_SLAVE) {
578 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
579 mnt->mnt_master = old;
580 CLEAR_MNT_SHARED(mnt);
581 } else if (!(flag & CL_PRIVATE)) {
582 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
583 list_add(&mnt->mnt_share, &old->mnt_share);
584 if (IS_MNT_SLAVE(old))
585 list_add(&mnt->mnt_slave, &old->mnt_slave);
586 mnt->mnt_master = old->mnt_master;
587 }
588 if (flag & CL_MAKE_SHARED)
589 set_mnt_shared(mnt);
590
591
592
593 if (flag & CL_EXPIRE) {
594 if (!list_empty(&old->mnt_expire))
595 list_add(&mnt->mnt_expire, &old->mnt_expire);
596 }
597 }
598 return mnt;
599
600 out_free:
601 free_vfsmnt(mnt);
602 return NULL;
603}
604
605static inline void __mntput(struct vfsmount *mnt)
606{
607 int cpu;
608 struct super_block *sb = mnt->mnt_sb;
609
610
611
612
613
614
615 for_each_possible_cpu(cpu) {
616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
617 spin_lock(&cpu_writer->lock);
618 if (cpu_writer->mnt != mnt) {
619 spin_unlock(&cpu_writer->lock);
620 continue;
621 }
622 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
623 cpu_writer->count = 0;
624
625
626
627
628
629 cpu_writer->mnt = NULL;
630 spin_unlock(&cpu_writer->lock);
631 }
632
633
634
635
636
637
638 WARN_ON(atomic_read(&mnt->__mnt_writers));
639 dput(mnt->mnt_root);
640 free_vfsmnt(mnt);
641 deactivate_super(sb);
642}
643
644void mntput_no_expire(struct vfsmount *mnt)
645{
646repeat:
647 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
648 if (likely(!mnt->mnt_pinned)) {
649 spin_unlock(&vfsmount_lock);
650 __mntput(mnt);
651 return;
652 }
653 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
654 mnt->mnt_pinned = 0;
655 spin_unlock(&vfsmount_lock);
656 acct_auto_close_mnt(mnt);
657 security_sb_umount_close(mnt);
658 goto repeat;
659 }
660}
661
662EXPORT_SYMBOL(mntput_no_expire);
663
664void mnt_pin(struct vfsmount *mnt)
665{
666 spin_lock(&vfsmount_lock);
667 mnt->mnt_pinned++;
668 spin_unlock(&vfsmount_lock);
669}
670
671EXPORT_SYMBOL(mnt_pin);
672
673void mnt_unpin(struct vfsmount *mnt)
674{
675 spin_lock(&vfsmount_lock);
676 if (mnt->mnt_pinned) {
677 atomic_inc(&mnt->mnt_count);
678 mnt->mnt_pinned--;
679 }
680 spin_unlock(&vfsmount_lock);
681}
682
683EXPORT_SYMBOL(mnt_unpin);
684
685static inline void mangle(struct seq_file *m, const char *s)
686{
687 seq_escape(m, s, " \t\n\\");
688}
689
690
691
692
693
694
695
696int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
697{
698 const char *options;
699
700 rcu_read_lock();
701 options = rcu_dereference(mnt->mnt_sb->s_options);
702
703 if (options != NULL && options[0]) {
704 seq_putc(m, ',');
705 mangle(m, options);
706 }
707 rcu_read_unlock();
708
709 return 0;
710}
711EXPORT_SYMBOL(generic_show_options);
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726void save_mount_options(struct super_block *sb, char *options)
727{
728 BUG_ON(sb->s_options);
729 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
730}
731EXPORT_SYMBOL(save_mount_options);
732
733void replace_mount_options(struct super_block *sb, char *options)
734{
735 char *old = sb->s_options;
736 rcu_assign_pointer(sb->s_options, options);
737 if (old) {
738 synchronize_rcu();
739 kfree(old);
740 }
741}
742EXPORT_SYMBOL(replace_mount_options);
743
744#ifdef CONFIG_PROC_FS
745
746static void *m_start(struct seq_file *m, loff_t *pos)
747{
748 struct proc_mounts *p = m->private;
749
750 down_read(&namespace_sem);
751 return seq_list_start(&p->ns->list, *pos);
752}
753
754static void *m_next(struct seq_file *m, void *v, loff_t *pos)
755{
756 struct proc_mounts *p = m->private;
757
758 return seq_list_next(v, &p->ns->list, pos);
759}
760
761static void m_stop(struct seq_file *m, void *v)
762{
763 up_read(&namespace_sem);
764}
765
766struct proc_fs_info {
767 int flag;
768 const char *str;
769};
770
771static int show_sb_opts(struct seq_file *m, struct super_block *sb)
772{
773 static const struct proc_fs_info fs_info[] = {
774 { MS_SYNCHRONOUS, ",sync" },
775 { MS_DIRSYNC, ",dirsync" },
776 { MS_MANDLOCK, ",mand" },
777 { 0, NULL }
778 };
779 const struct proc_fs_info *fs_infop;
780
781 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
782 if (sb->s_flags & fs_infop->flag)
783 seq_puts(m, fs_infop->str);
784 }
785
786 return security_sb_show_options(m, sb);
787}
788
789static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
790{
791 static const struct proc_fs_info mnt_info[] = {
792 { MNT_NOSUID, ",nosuid" },
793 { MNT_NODEV, ",nodev" },
794 { MNT_NOEXEC, ",noexec" },
795 { MNT_NOATIME, ",noatime" },
796 { MNT_NODIRATIME, ",nodiratime" },
797 { MNT_RELATIME, ",relatime" },
798 { MNT_STRICTATIME, ",strictatime" },
799 { 0, NULL }
800 };
801 const struct proc_fs_info *fs_infop;
802
803 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
804 if (mnt->mnt_flags & fs_infop->flag)
805 seq_puts(m, fs_infop->str);
806 }
807}
808
809static void show_type(struct seq_file *m, struct super_block *sb)
810{
811 mangle(m, sb->s_type->name);
812 if (sb->s_subtype && sb->s_subtype[0]) {
813 seq_putc(m, '.');
814 mangle(m, sb->s_subtype);
815 }
816}
817
818static int show_vfsmnt(struct seq_file *m, void *v)
819{
820 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
821 int err = 0;
822 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
823
824 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
825 seq_putc(m, ' ');
826 seq_path(m, &mnt_path, " \t\n\\");
827 seq_putc(m, ' ');
828 show_type(m, mnt->mnt_sb);
829 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
830 err = show_sb_opts(m, mnt->mnt_sb);
831 if (err)
832 goto out;
833 show_mnt_opts(m, mnt);
834 if (mnt->mnt_sb->s_op->show_options)
835 err = mnt->mnt_sb->s_op->show_options(m, mnt);
836 seq_puts(m, " 0 0\n");
837out:
838 return err;
839}
840
841const struct seq_operations mounts_op = {
842 .start = m_start,
843 .next = m_next,
844 .stop = m_stop,
845 .show = show_vfsmnt
846};
847
848static int show_mountinfo(struct seq_file *m, void *v)
849{
850 struct proc_mounts *p = m->private;
851 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
852 struct super_block *sb = mnt->mnt_sb;
853 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
854 struct path root = p->root;
855 int err = 0;
856
857 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
858 MAJOR(sb->s_dev), MINOR(sb->s_dev));
859 seq_dentry(m, mnt->mnt_root, " \t\n\\");
860 seq_putc(m, ' ');
861 seq_path_root(m, &mnt_path, &root, " \t\n\\");
862 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
863
864
865
866
867
868 return SEQ_SKIP;
869 }
870 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
871 show_mnt_opts(m, mnt);
872
873
874 if (IS_MNT_SHARED(mnt))
875 seq_printf(m, " shared:%i", mnt->mnt_group_id);
876 if (IS_MNT_SLAVE(mnt)) {
877 int master = mnt->mnt_master->mnt_group_id;
878 int dom = get_dominating_id(mnt, &p->root);
879 seq_printf(m, " master:%i", master);
880 if (dom && dom != master)
881 seq_printf(m, " propagate_from:%i", dom);
882 }
883 if (IS_MNT_UNBINDABLE(mnt))
884 seq_puts(m, " unbindable");
885
886
887 seq_puts(m, " - ");
888 show_type(m, sb);
889 seq_putc(m, ' ');
890 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
891 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
892 err = show_sb_opts(m, sb);
893 if (err)
894 goto out;
895 if (sb->s_op->show_options)
896 err = sb->s_op->show_options(m, mnt);
897 seq_putc(m, '\n');
898out:
899 return err;
900}
901
902const struct seq_operations mountinfo_op = {
903 .start = m_start,
904 .next = m_next,
905 .stop = m_stop,
906 .show = show_mountinfo,
907};
908
909static int show_vfsstat(struct seq_file *m, void *v)
910{
911 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
912 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
913 int err = 0;
914
915
916 if (mnt->mnt_devname) {
917 seq_puts(m, "device ");
918 mangle(m, mnt->mnt_devname);
919 } else
920 seq_puts(m, "no device");
921
922
923 seq_puts(m, " mounted on ");
924 seq_path(m, &mnt_path, " \t\n\\");
925 seq_putc(m, ' ');
926
927
928 seq_puts(m, "with fstype ");
929 show_type(m, mnt->mnt_sb);
930
931
932 if (mnt->mnt_sb->s_op->show_stats) {
933 seq_putc(m, ' ');
934 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
935 }
936
937 seq_putc(m, '\n');
938 return err;
939}
940
941const struct seq_operations mountstats_op = {
942 .start = m_start,
943 .next = m_next,
944 .stop = m_stop,
945 .show = show_vfsstat,
946};
947#endif
948
949
950
951
952
953
954
955
956
957int may_umount_tree(struct vfsmount *mnt)
958{
959 int actual_refs = 0;
960 int minimum_refs = 0;
961 struct vfsmount *p;
962
963 spin_lock(&vfsmount_lock);
964 for (p = mnt; p; p = next_mnt(p, mnt)) {
965 actual_refs += atomic_read(&p->mnt_count);
966 minimum_refs += 2;
967 }
968 spin_unlock(&vfsmount_lock);
969
970 if (actual_refs > minimum_refs)
971 return 0;
972
973 return 1;
974}
975
976EXPORT_SYMBOL(may_umount_tree);
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991int may_umount(struct vfsmount *mnt)
992{
993 int ret = 1;
994 spin_lock(&vfsmount_lock);
995 if (propagate_mount_busy(mnt, 2))
996 ret = 0;
997 spin_unlock(&vfsmount_lock);
998 return ret;
999}
1000
1001EXPORT_SYMBOL(may_umount);
1002
1003void release_mounts(struct list_head *head)
1004{
1005 struct vfsmount *mnt;
1006 while (!list_empty(head)) {
1007 mnt = list_first_entry(head, struct vfsmount, mnt_hash);
1008 list_del_init(&mnt->mnt_hash);
1009 if (mnt->mnt_parent != mnt) {
1010 struct dentry *dentry;
1011 struct vfsmount *m;
1012 spin_lock(&vfsmount_lock);
1013 dentry = mnt->mnt_mountpoint;
1014 m = mnt->mnt_parent;
1015 mnt->mnt_mountpoint = mnt->mnt_root;
1016 mnt->mnt_parent = mnt;
1017 m->mnt_ghosts--;
1018 spin_unlock(&vfsmount_lock);
1019 dput(dentry);
1020 mntput(m);
1021 }
1022 mntput(mnt);
1023 }
1024}
1025
1026void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1027{
1028 struct vfsmount *p;
1029
1030 for (p = mnt; p; p = next_mnt(p, mnt))
1031 list_move(&p->mnt_hash, kill);
1032
1033 if (propagate)
1034 propagate_umount(kill);
1035
1036 list_for_each_entry(p, kill, mnt_hash) {
1037 list_del_init(&p->mnt_expire);
1038 list_del_init(&p->mnt_list);
1039 __touch_mnt_namespace(p->mnt_ns);
1040 p->mnt_ns = NULL;
1041 list_del_init(&p->mnt_child);
1042 if (p->mnt_parent != p) {
1043 p->mnt_parent->mnt_ghosts++;
1044 p->mnt_mountpoint->d_mounted--;
1045 }
1046 change_mnt_propagation(p, MS_PRIVATE);
1047 }
1048}
1049
1050static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
1051
1052static int do_umount(struct vfsmount *mnt, int flags)
1053{
1054 struct super_block *sb = mnt->mnt_sb;
1055 int retval;
1056 LIST_HEAD(umount_list);
1057
1058 retval = security_sb_umount(mnt, flags);
1059 if (retval)
1060 return retval;
1061
1062
1063
1064
1065
1066
1067
1068 if (flags & MNT_EXPIRE) {
1069 if (mnt == current->fs->root.mnt ||
1070 flags & (MNT_FORCE | MNT_DETACH))
1071 return -EINVAL;
1072
1073 if (atomic_read(&mnt->mnt_count) != 2)
1074 return -EBUSY;
1075
1076 if (!xchg(&mnt->mnt_expiry_mark, 1))
1077 return -EAGAIN;
1078 }
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1091 sb->s_op->umount_begin(sb);
1092 }
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1104
1105
1106
1107
1108 down_write(&sb->s_umount);
1109 if (!(sb->s_flags & MS_RDONLY)) {
1110 lock_kernel();
1111 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1112 unlock_kernel();
1113 }
1114 up_write(&sb->s_umount);
1115 return retval;
1116 }
1117
1118 down_write(&namespace_sem);
1119 spin_lock(&vfsmount_lock);
1120 event++;
1121
1122 if (!(flags & MNT_DETACH))
1123 shrink_submounts(mnt, &umount_list);
1124
1125 retval = -EBUSY;
1126 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1127 if (!list_empty(&mnt->mnt_list))
1128 umount_tree(mnt, 1, &umount_list);
1129 retval = 0;
1130 }
1131 spin_unlock(&vfsmount_lock);
1132 if (retval)
1133 security_sb_umount_busy(mnt);
1134 up_write(&namespace_sem);
1135 release_mounts(&umount_list);
1136 return retval;
1137}
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1148{
1149 struct path path;
1150 int retval;
1151
1152 retval = user_path(name, &path);
1153 if (retval)
1154 goto out;
1155 retval = -EINVAL;
1156 if (path.dentry != path.mnt->mnt_root)
1157 goto dput_and_out;
1158 if (!check_mnt(path.mnt))
1159 goto dput_and_out;
1160
1161 retval = -EPERM;
1162 if (!capable(CAP_SYS_ADMIN))
1163 goto dput_and_out;
1164
1165 retval = do_umount(path.mnt, flags);
1166dput_and_out:
1167
1168 dput(path.dentry);
1169 mntput_no_expire(path.mnt);
1170out:
1171 return retval;
1172}
1173
1174#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1175
1176
1177
1178
1179SYSCALL_DEFINE1(oldumount, char __user *, name)
1180{
1181 return sys_umount(name, 0);
1182}
1183
1184#endif
1185
1186static int mount_is_safe(struct path *path)
1187{
1188 if (capable(CAP_SYS_ADMIN))
1189 return 0;
1190 return -EPERM;
1191#ifdef notyet
1192 if (S_ISLNK(path->dentry->d_inode->i_mode))
1193 return -EPERM;
1194 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1195 if (current_uid() != path->dentry->d_inode->i_uid)
1196 return -EPERM;
1197 }
1198 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1199 return -EPERM;
1200 return 0;
1201#endif
1202}
1203
1204struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1205 int flag)
1206{
1207 struct vfsmount *res, *p, *q, *r, *s;
1208 struct path path;
1209
1210 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1211 return NULL;
1212
1213 res = q = clone_mnt(mnt, dentry, flag);
1214 if (!q)
1215 goto Enomem;
1216 q->mnt_mountpoint = mnt->mnt_mountpoint;
1217
1218 p = mnt;
1219 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1220 if (!is_subdir(r->mnt_mountpoint, dentry))
1221 continue;
1222
1223 for (s = r; s; s = next_mnt(s, r)) {
1224 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
1225 s = skip_mnt_tree(s);
1226 continue;
1227 }
1228 while (p != s->mnt_parent) {
1229 p = p->mnt_parent;
1230 q = q->mnt_parent;
1231 }
1232 p = s;
1233 path.mnt = q;
1234 path.dentry = p->mnt_mountpoint;
1235 q = clone_mnt(p, p->mnt_root, flag);
1236 if (!q)
1237 goto Enomem;
1238 spin_lock(&vfsmount_lock);
1239 list_add_tail(&q->mnt_list, &res->mnt_list);
1240 attach_mnt(q, &path);
1241 spin_unlock(&vfsmount_lock);
1242 }
1243 }
1244 return res;
1245Enomem:
1246 if (res) {
1247 LIST_HEAD(umount_list);
1248 spin_lock(&vfsmount_lock);
1249 umount_tree(res, 0, &umount_list);
1250 spin_unlock(&vfsmount_lock);
1251 release_mounts(&umount_list);
1252 }
1253 return NULL;
1254}
1255
1256struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
1257{
1258 struct vfsmount *tree;
1259 down_write(&namespace_sem);
1260 tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
1261 up_write(&namespace_sem);
1262 return tree;
1263}
1264
1265void drop_collected_mounts(struct vfsmount *mnt)
1266{
1267 LIST_HEAD(umount_list);
1268 down_write(&namespace_sem);
1269 spin_lock(&vfsmount_lock);
1270 umount_tree(mnt, 0, &umount_list);
1271 spin_unlock(&vfsmount_lock);
1272 up_write(&namespace_sem);
1273 release_mounts(&umount_list);
1274}
1275
1276static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1277{
1278 struct vfsmount *p;
1279
1280 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1281 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1282 mnt_release_group_id(p);
1283 }
1284}
1285
1286static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1287{
1288 struct vfsmount *p;
1289
1290 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1291 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1292 int err = mnt_alloc_group_id(p);
1293 if (err) {
1294 cleanup_group_ids(mnt, p);
1295 return err;
1296 }
1297 }
1298 }
1299
1300 return 0;
1301}
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366static int attach_recursive_mnt(struct vfsmount *source_mnt,
1367 struct path *path, struct path *parent_path)
1368{
1369 LIST_HEAD(tree_list);
1370 struct vfsmount *dest_mnt = path->mnt;
1371 struct dentry *dest_dentry = path->dentry;
1372 struct vfsmount *child, *p;
1373 int err;
1374
1375 if (IS_MNT_SHARED(dest_mnt)) {
1376 err = invent_group_ids(source_mnt, true);
1377 if (err)
1378 goto out;
1379 }
1380 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
1381 if (err)
1382 goto out_cleanup_ids;
1383
1384 if (IS_MNT_SHARED(dest_mnt)) {
1385 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1386 set_mnt_shared(p);
1387 }
1388
1389 spin_lock(&vfsmount_lock);
1390 if (parent_path) {
1391 detach_mnt(source_mnt, parent_path);
1392 attach_mnt(source_mnt, path);
1393 touch_mnt_namespace(parent_path->mnt->mnt_ns);
1394 } else {
1395 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1396 commit_tree(source_mnt);
1397 }
1398
1399 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
1400 list_del_init(&child->mnt_hash);
1401 commit_tree(child);
1402 }
1403 spin_unlock(&vfsmount_lock);
1404 return 0;
1405
1406 out_cleanup_ids:
1407 if (IS_MNT_SHARED(dest_mnt))
1408 cleanup_group_ids(source_mnt, NULL);
1409 out:
1410 return err;
1411}
1412
1413static int graft_tree(struct vfsmount *mnt, struct path *path)
1414{
1415 int err;
1416 if (mnt->mnt_sb->s_flags & MS_NOUSER)
1417 return -EINVAL;
1418
1419 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1420 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
1421 return -ENOTDIR;
1422
1423 err = -ENOENT;
1424 mutex_lock(&path->dentry->d_inode->i_mutex);
1425 if (IS_DEADDIR(path->dentry->d_inode))
1426 goto out_unlock;
1427
1428 err = security_sb_check_sb(mnt, path);
1429 if (err)
1430 goto out_unlock;
1431
1432 err = -ENOENT;
1433 if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
1434 err = attach_recursive_mnt(mnt, path, NULL);
1435out_unlock:
1436 mutex_unlock(&path->dentry->d_inode->i_mutex);
1437 if (!err)
1438 security_sb_post_addmount(mnt, path);
1439 return err;
1440}
1441
1442
1443
1444
1445static int do_change_type(struct path *path, int flag)
1446{
1447 struct vfsmount *m, *mnt = path->mnt;
1448 int recurse = flag & MS_REC;
1449 int type = flag & ~MS_REC;
1450 int err = 0;
1451
1452 if (!capable(CAP_SYS_ADMIN))
1453 return -EPERM;
1454
1455 if (path->dentry != path->mnt->mnt_root)
1456 return -EINVAL;
1457
1458 down_write(&namespace_sem);
1459 if (type == MS_SHARED) {
1460 err = invent_group_ids(mnt, recurse);
1461 if (err)
1462 goto out_unlock;
1463 }
1464
1465 spin_lock(&vfsmount_lock);
1466 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1467 change_mnt_propagation(m, type);
1468 spin_unlock(&vfsmount_lock);
1469
1470 out_unlock:
1471 up_write(&namespace_sem);
1472 return err;
1473}
1474
1475
1476
1477
1478static int do_loopback(struct path *path, char *old_name,
1479 int recurse)
1480{
1481 struct path old_path;
1482 struct vfsmount *mnt = NULL;
1483 int err = mount_is_safe(path);
1484 if (err)
1485 return err;
1486 if (!old_name || !*old_name)
1487 return -EINVAL;
1488 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1489 if (err)
1490 return err;
1491
1492 down_write(&namespace_sem);
1493 err = -EINVAL;
1494 if (IS_MNT_UNBINDABLE(old_path.mnt))
1495 goto out;
1496
1497 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1498 goto out;
1499
1500 err = -ENOMEM;
1501 if (recurse)
1502 mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
1503 else
1504 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1505
1506 if (!mnt)
1507 goto out;
1508
1509 err = graft_tree(mnt, path);
1510 if (err) {
1511 LIST_HEAD(umount_list);
1512 spin_lock(&vfsmount_lock);
1513 umount_tree(mnt, 0, &umount_list);
1514 spin_unlock(&vfsmount_lock);
1515 release_mounts(&umount_list);
1516 }
1517
1518out:
1519 up_write(&namespace_sem);
1520 path_put(&old_path);
1521 return err;
1522}
1523
1524static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1525{
1526 int error = 0;
1527 int readonly_request = 0;
1528
1529 if (ms_flags & MS_RDONLY)
1530 readonly_request = 1;
1531 if (readonly_request == __mnt_is_readonly(mnt))
1532 return 0;
1533
1534 if (readonly_request)
1535 error = mnt_make_readonly(mnt);
1536 else
1537 __mnt_unmake_readonly(mnt);
1538 return error;
1539}
1540
1541
1542
1543
1544
1545
1546static int do_remount(struct path *path, int flags, int mnt_flags,
1547 void *data)
1548{
1549 int err;
1550 struct super_block *sb = path->mnt->mnt_sb;
1551
1552 if (!capable(CAP_SYS_ADMIN))
1553 return -EPERM;
1554
1555 if (!check_mnt(path->mnt))
1556 return -EINVAL;
1557
1558 if (path->dentry != path->mnt->mnt_root)
1559 return -EINVAL;
1560
1561 down_write(&sb->s_umount);
1562 if (flags & MS_BIND)
1563 err = change_mount_flags(path->mnt, flags);
1564 else
1565 err = do_remount_sb(sb, flags, data, 0);
1566 if (!err)
1567 path->mnt->mnt_flags = mnt_flags;
1568 up_write(&sb->s_umount);
1569 if (!err) {
1570 security_sb_post_remount(path->mnt, flags, data);
1571
1572 spin_lock(&vfsmount_lock);
1573 touch_mnt_namespace(path->mnt->mnt_ns);
1574 spin_unlock(&vfsmount_lock);
1575 }
1576 return err;
1577}
1578
1579static inline int tree_contains_unbindable(struct vfsmount *mnt)
1580{
1581 struct vfsmount *p;
1582 for (p = mnt; p; p = next_mnt(p, mnt)) {
1583 if (IS_MNT_UNBINDABLE(p))
1584 return 1;
1585 }
1586 return 0;
1587}
1588
1589static int do_move_mount(struct path *path, char *old_name)
1590{
1591 struct path old_path, parent_path;
1592 struct vfsmount *p;
1593 int err = 0;
1594 if (!capable(CAP_SYS_ADMIN))
1595 return -EPERM;
1596 if (!old_name || !*old_name)
1597 return -EINVAL;
1598 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1599 if (err)
1600 return err;
1601
1602 down_write(&namespace_sem);
1603 while (d_mountpoint(path->dentry) &&
1604 follow_down(&path->mnt, &path->dentry))
1605 ;
1606 err = -EINVAL;
1607 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1608 goto out;
1609
1610 err = -ENOENT;
1611 mutex_lock(&path->dentry->d_inode->i_mutex);
1612 if (IS_DEADDIR(path->dentry->d_inode))
1613 goto out1;
1614
1615 if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
1616 goto out1;
1617
1618 err = -EINVAL;
1619 if (old_path.dentry != old_path.mnt->mnt_root)
1620 goto out1;
1621
1622 if (old_path.mnt == old_path.mnt->mnt_parent)
1623 goto out1;
1624
1625 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1626 S_ISDIR(old_path.dentry->d_inode->i_mode))
1627 goto out1;
1628
1629
1630
1631 if (old_path.mnt->mnt_parent &&
1632 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1633 goto out1;
1634
1635
1636
1637
1638 if (IS_MNT_SHARED(path->mnt) &&
1639 tree_contains_unbindable(old_path.mnt))
1640 goto out1;
1641 err = -ELOOP;
1642 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
1643 if (p == old_path.mnt)
1644 goto out1;
1645
1646 err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
1647 if (err)
1648 goto out1;
1649
1650
1651
1652 list_del_init(&old_path.mnt->mnt_expire);
1653out1:
1654 mutex_unlock(&path->dentry->d_inode->i_mutex);
1655out:
1656 up_write(&namespace_sem);
1657 if (!err)
1658 path_put(&parent_path);
1659 path_put(&old_path);
1660 return err;
1661}
1662
1663
1664
1665
1666
1667static int do_new_mount(struct path *path, char *type, int flags,
1668 int mnt_flags, char *name, void *data)
1669{
1670 struct vfsmount *mnt;
1671
1672 if (!type || !memchr(type, 0, PAGE_SIZE))
1673 return -EINVAL;
1674
1675
1676 if (!capable(CAP_SYS_ADMIN))
1677 return -EPERM;
1678
1679 mnt = do_kern_mount(type, flags, name, data);
1680 if (IS_ERR(mnt))
1681 return PTR_ERR(mnt);
1682
1683 return do_add_mount(mnt, path, mnt_flags, NULL);
1684}
1685
1686
1687
1688
1689
1690int do_add_mount(struct vfsmount *newmnt, struct path *path,
1691 int mnt_flags, struct list_head *fslist)
1692{
1693 int err;
1694
1695 down_write(&namespace_sem);
1696
1697 while (d_mountpoint(path->dentry) &&
1698 follow_down(&path->mnt, &path->dentry))
1699 ;
1700 err = -EINVAL;
1701 if (!check_mnt(path->mnt))
1702 goto unlock;
1703
1704
1705 err = -EBUSY;
1706 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1707 path->mnt->mnt_root == path->dentry)
1708 goto unlock;
1709
1710 err = -EINVAL;
1711 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1712 goto unlock;
1713
1714 newmnt->mnt_flags = mnt_flags;
1715 if ((err = graft_tree(newmnt, path)))
1716 goto unlock;
1717
1718 if (fslist)
1719 list_add_tail(&newmnt->mnt_expire, fslist);
1720
1721 up_write(&namespace_sem);
1722 return 0;
1723
1724unlock:
1725 up_write(&namespace_sem);
1726 mntput(newmnt);
1727 return err;
1728}
1729
1730EXPORT_SYMBOL_GPL(do_add_mount);
1731
1732
1733
1734
1735
1736
1737void mark_mounts_for_expiry(struct list_head *mounts)
1738{
1739 struct vfsmount *mnt, *next;
1740 LIST_HEAD(graveyard);
1741 LIST_HEAD(umounts);
1742
1743 if (list_empty(mounts))
1744 return;
1745
1746 down_write(&namespace_sem);
1747 spin_lock(&vfsmount_lock);
1748
1749
1750
1751
1752
1753
1754
1755 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1756 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1757 propagate_mount_busy(mnt, 1))
1758 continue;
1759 list_move(&mnt->mnt_expire, &graveyard);
1760 }
1761 while (!list_empty(&graveyard)) {
1762 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
1763 touch_mnt_namespace(mnt->mnt_ns);
1764 umount_tree(mnt, 1, &umounts);
1765 }
1766 spin_unlock(&vfsmount_lock);
1767 up_write(&namespace_sem);
1768
1769 release_mounts(&umounts);
1770}
1771
1772EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1773
1774
1775
1776
1777
1778
1779
1780static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1781{
1782 struct vfsmount *this_parent = parent;
1783 struct list_head *next;
1784 int found = 0;
1785
1786repeat:
1787 next = this_parent->mnt_mounts.next;
1788resume:
1789 while (next != &this_parent->mnt_mounts) {
1790 struct list_head *tmp = next;
1791 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1792
1793 next = tmp->next;
1794 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1795 continue;
1796
1797
1798
1799 if (!list_empty(&mnt->mnt_mounts)) {
1800 this_parent = mnt;
1801 goto repeat;
1802 }
1803
1804 if (!propagate_mount_busy(mnt, 1)) {
1805 list_move_tail(&mnt->mnt_expire, graveyard);
1806 found++;
1807 }
1808 }
1809
1810
1811
1812 if (this_parent != parent) {
1813 next = this_parent->mnt_child.next;
1814 this_parent = this_parent->mnt_parent;
1815 goto resume;
1816 }
1817 return found;
1818}
1819
1820
1821
1822
1823
1824static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1825{
1826 LIST_HEAD(graveyard);
1827 struct vfsmount *m;
1828
1829
1830 while (select_submounts(mnt, &graveyard)) {
1831 while (!list_empty(&graveyard)) {
1832 m = list_first_entry(&graveyard, struct vfsmount,
1833 mnt_expire);
1834 touch_mnt_namespace(m->mnt_ns);
1835 umount_tree(m, 1, umounts);
1836 }
1837 }
1838}
1839
1840
1841
1842
1843
1844
1845
1846static long exact_copy_from_user(void *to, const void __user * from,
1847 unsigned long n)
1848{
1849 char *t = to;
1850 const char __user *f = from;
1851 char c;
1852
1853 if (!access_ok(VERIFY_READ, from, n))
1854 return n;
1855
1856 while (n) {
1857 if (__get_user(c, f)) {
1858 memset(t, 0, n);
1859 break;
1860 }
1861 *t++ = c;
1862 f++;
1863 n--;
1864 }
1865 return n;
1866}
1867
1868int copy_mount_options(const void __user * data, unsigned long *where)
1869{
1870 int i;
1871 unsigned long page;
1872 unsigned long size;
1873
1874 *where = 0;
1875 if (!data)
1876 return 0;
1877
1878 if (!(page = __get_free_page(GFP_KERNEL)))
1879 return -ENOMEM;
1880
1881
1882
1883
1884
1885
1886 size = TASK_SIZE - (unsigned long)data;
1887 if (size > PAGE_SIZE)
1888 size = PAGE_SIZE;
1889
1890 i = size - exact_copy_from_user((void *)page, data, size);
1891 if (!i) {
1892 free_page(page);
1893 return -EFAULT;
1894 }
1895 if (i != PAGE_SIZE)
1896 memset((char *)page + i, 0, PAGE_SIZE - i);
1897 *where = page;
1898 return 0;
1899}
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915long do_mount(char *dev_name, char *dir_name, char *type_page,
1916 unsigned long flags, void *data_page)
1917{
1918 struct path path;
1919 int retval = 0;
1920 int mnt_flags = 0;
1921
1922
1923 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1924 flags &= ~MS_MGC_MSK;
1925
1926
1927
1928 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1929 return -EINVAL;
1930 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1931 return -EINVAL;
1932
1933 if (data_page)
1934 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1935
1936
1937 if (!(flags & MS_NOATIME))
1938 mnt_flags |= MNT_RELATIME;
1939
1940
1941 if (flags & MS_NOSUID)
1942 mnt_flags |= MNT_NOSUID;
1943 if (flags & MS_NODEV)
1944 mnt_flags |= MNT_NODEV;
1945 if (flags & MS_NOEXEC)
1946 mnt_flags |= MNT_NOEXEC;
1947 if (flags & MS_NOATIME)
1948 mnt_flags |= MNT_NOATIME;
1949 if (flags & MS_NODIRATIME)
1950 mnt_flags |= MNT_NODIRATIME;
1951 if (flags & MS_STRICTATIME)
1952 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
1953 if (flags & MS_RDONLY)
1954 mnt_flags |= MNT_READONLY;
1955
1956 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1957 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1958 MS_STRICTATIME);
1959
1960
1961 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1962 if (retval)
1963 return retval;
1964
1965 retval = security_sb_mount(dev_name, &path,
1966 type_page, flags, data_page);
1967 if (retval)
1968 goto dput_out;
1969
1970 if (flags & MS_REMOUNT)
1971 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1972 data_page);
1973 else if (flags & MS_BIND)
1974 retval = do_loopback(&path, dev_name, flags & MS_REC);
1975 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1976 retval = do_change_type(&path, flags);
1977 else if (flags & MS_MOVE)
1978 retval = do_move_mount(&path, dev_name);
1979 else
1980 retval = do_new_mount(&path, type_page, flags, mnt_flags,
1981 dev_name, data_page);
1982dput_out:
1983 path_put(&path);
1984 return retval;
1985}
1986
1987
1988
1989
1990
1991static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1992 struct fs_struct *fs)
1993{
1994 struct mnt_namespace *new_ns;
1995 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1996 struct vfsmount *p, *q;
1997
1998 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1999 if (!new_ns)
2000 return ERR_PTR(-ENOMEM);
2001
2002 atomic_set(&new_ns->count, 1);
2003 INIT_LIST_HEAD(&new_ns->list);
2004 init_waitqueue_head(&new_ns->poll);
2005 new_ns->event = 0;
2006
2007 down_write(&namespace_sem);
2008
2009 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
2010 CL_COPY_ALL | CL_EXPIRE);
2011 if (!new_ns->root) {
2012 up_write(&namespace_sem);
2013 kfree(new_ns);
2014 return ERR_PTR(-ENOMEM);
2015 }
2016 spin_lock(&vfsmount_lock);
2017 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2018 spin_unlock(&vfsmount_lock);
2019
2020
2021
2022
2023
2024
2025 p = mnt_ns->root;
2026 q = new_ns->root;
2027 while (p) {
2028 q->mnt_ns = new_ns;
2029 if (fs) {
2030 if (p == fs->root.mnt) {
2031 rootmnt = p;
2032 fs->root.mnt = mntget(q);
2033 }
2034 if (p == fs->pwd.mnt) {
2035 pwdmnt = p;
2036 fs->pwd.mnt = mntget(q);
2037 }
2038 }
2039 p = next_mnt(p, mnt_ns->root);
2040 q = next_mnt(q, new_ns->root);
2041 }
2042 up_write(&namespace_sem);
2043
2044 if (rootmnt)
2045 mntput(rootmnt);
2046 if (pwdmnt)
2047 mntput(pwdmnt);
2048
2049 return new_ns;
2050}
2051
2052struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2053 struct fs_struct *new_fs)
2054{
2055 struct mnt_namespace *new_ns;
2056
2057 BUG_ON(!ns);
2058 get_mnt_ns(ns);
2059
2060 if (!(flags & CLONE_NEWNS))
2061 return ns;
2062
2063 new_ns = dup_mnt_ns(ns, new_fs);
2064
2065 put_mnt_ns(ns);
2066 return new_ns;
2067}
2068
2069SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2070 char __user *, type, unsigned long, flags, void __user *, data)
2071{
2072 int retval;
2073 unsigned long data_page;
2074 unsigned long type_page;
2075 unsigned long dev_page;
2076 char *dir_page;
2077
2078 retval = copy_mount_options(type, &type_page);
2079 if (retval < 0)
2080 return retval;
2081
2082 dir_page = getname(dir_name);
2083 retval = PTR_ERR(dir_page);
2084 if (IS_ERR(dir_page))
2085 goto out1;
2086
2087 retval = copy_mount_options(dev_name, &dev_page);
2088 if (retval < 0)
2089 goto out2;
2090
2091 retval = copy_mount_options(data, &data_page);
2092 if (retval < 0)
2093 goto out3;
2094
2095 lock_kernel();
2096 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
2097 flags, (void *)data_page);
2098 unlock_kernel();
2099 free_page(data_page);
2100
2101out3:
2102 free_page(dev_page);
2103out2:
2104 putname(dir_page);
2105out1:
2106 free_page(type_page);
2107 return retval;
2108}
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2136 const char __user *, put_old)
2137{
2138 struct vfsmount *tmp;
2139 struct path new, old, parent_path, root_parent, root;
2140 int error;
2141
2142 if (!capable(CAP_SYS_ADMIN))
2143 return -EPERM;
2144
2145 error = user_path_dir(new_root, &new);
2146 if (error)
2147 goto out0;
2148 error = -EINVAL;
2149 if (!check_mnt(new.mnt))
2150 goto out1;
2151
2152 error = user_path_dir(put_old, &old);
2153 if (error)
2154 goto out1;
2155
2156 error = security_sb_pivotroot(&old, &new);
2157 if (error) {
2158 path_put(&old);
2159 goto out1;
2160 }
2161
2162 read_lock(¤t->fs->lock);
2163 root = current->fs->root;
2164 path_get(¤t->fs->root);
2165 read_unlock(¤t->fs->lock);
2166 down_write(&namespace_sem);
2167 mutex_lock(&old.dentry->d_inode->i_mutex);
2168 error = -EINVAL;
2169 if (IS_MNT_SHARED(old.mnt) ||
2170 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2171 IS_MNT_SHARED(root.mnt->mnt_parent))
2172 goto out2;
2173 if (!check_mnt(root.mnt))
2174 goto out2;
2175 error = -ENOENT;
2176 if (IS_DEADDIR(new.dentry->d_inode))
2177 goto out2;
2178 if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
2179 goto out2;
2180 if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
2181 goto out2;
2182 error = -EBUSY;
2183 if (new.mnt == root.mnt ||
2184 old.mnt == root.mnt)
2185 goto out2;
2186 error = -EINVAL;
2187 if (root.mnt->mnt_root != root.dentry)
2188 goto out2;
2189 if (root.mnt->mnt_parent == root.mnt)
2190 goto out2;
2191 if (new.mnt->mnt_root != new.dentry)
2192 goto out2;
2193 if (new.mnt->mnt_parent == new.mnt)
2194 goto out2;
2195
2196 tmp = old.mnt;
2197 spin_lock(&vfsmount_lock);
2198 if (tmp != new.mnt) {
2199 for (;;) {
2200 if (tmp->mnt_parent == tmp)
2201 goto out3;
2202 if (tmp->mnt_parent == new.mnt)
2203 break;
2204 tmp = tmp->mnt_parent;
2205 }
2206 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2207 goto out3;
2208 } else if (!is_subdir(old.dentry, new.dentry))
2209 goto out3;
2210 detach_mnt(new.mnt, &parent_path);
2211 detach_mnt(root.mnt, &root_parent);
2212
2213 attach_mnt(root.mnt, &old);
2214
2215 attach_mnt(new.mnt, &root_parent);
2216 touch_mnt_namespace(current->nsproxy->mnt_ns);
2217 spin_unlock(&vfsmount_lock);
2218 chroot_fs_refs(&root, &new);
2219 security_sb_post_pivotroot(&root, &new);
2220 error = 0;
2221 path_put(&root_parent);
2222 path_put(&parent_path);
2223out2:
2224 mutex_unlock(&old.dentry->d_inode->i_mutex);
2225 up_write(&namespace_sem);
2226 path_put(&root);
2227 path_put(&old);
2228out1:
2229 path_put(&new);
2230out0:
2231 return error;
2232out3:
2233 spin_unlock(&vfsmount_lock);
2234 goto out2;
2235}
2236
2237static void __init init_mount_tree(void)
2238{
2239 struct vfsmount *mnt;
2240 struct mnt_namespace *ns;
2241 struct path root;
2242
2243 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2244 if (IS_ERR(mnt))
2245 panic("Can't create rootfs");
2246 ns = kmalloc(sizeof(*ns), GFP_KERNEL);
2247 if (!ns)
2248 panic("Can't allocate initial namespace");
2249 atomic_set(&ns->count, 1);
2250 INIT_LIST_HEAD(&ns->list);
2251 init_waitqueue_head(&ns->poll);
2252 ns->event = 0;
2253 list_add(&mnt->mnt_list, &ns->list);
2254 ns->root = mnt;
2255 mnt->mnt_ns = ns;
2256
2257 init_task.nsproxy->mnt_ns = ns;
2258 get_mnt_ns(ns);
2259
2260 root.mnt = ns->root;
2261 root.dentry = ns->root->mnt_root;
2262
2263 set_fs_pwd(current->fs, &root);
2264 set_fs_root(current->fs, &root);
2265}
2266
2267void __init mnt_init(void)
2268{
2269 unsigned u;
2270 int err;
2271
2272 init_rwsem(&namespace_sem);
2273
2274 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
2275 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2276
2277 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2278
2279 if (!mount_hashtable)
2280 panic("Failed to allocate mount hash table\n");
2281
2282 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
2283
2284 for (u = 0; u < HASH_SIZE; u++)
2285 INIT_LIST_HEAD(&mount_hashtable[u]);
2286
2287 err = sysfs_init();
2288 if (err)
2289 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2290 __func__, err);
2291 fs_kobj = kobject_create_and_add("fs", NULL);
2292 if (!fs_kobj)
2293 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2294 init_rootfs();
2295 init_mount_tree();
2296}
2297
2298void __put_mnt_ns(struct mnt_namespace *ns)
2299{
2300 struct vfsmount *root = ns->root;
2301 LIST_HEAD(umount_list);
2302 ns->root = NULL;
2303 spin_unlock(&vfsmount_lock);
2304 down_write(&namespace_sem);
2305 spin_lock(&vfsmount_lock);
2306 umount_tree(root, 0, &umount_list);
2307 spin_unlock(&vfsmount_lock);
2308 up_write(&namespace_sem);
2309 release_mounts(&umount_list);
2310 kfree(ns);
2311}
2312