1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/export.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
30#include <linux/perf_event.h>
31#include <linux/audit.h>
32#include <linux/khugepaged.h>
33#include <linux/uprobes.h>
34
35#include <asm/uaccess.h>
36#include <asm/cacheflush.h>
37#include <asm/tlb.h>
38#include <asm/mmu_context.h>
39
40#include "internal.h"
41
42#ifndef arch_mmap_check
43#define arch_mmap_check(addr, len, flags) (0)
44#endif
45
46#ifndef arch_rebalance_pgtables
47#define arch_rebalance_pgtables(addr, len) (addr)
48#endif
49
50static void unmap_region(struct mm_struct *mm,
51 struct vm_area_struct *vma, struct vm_area_struct *prev,
52 unsigned long start, unsigned long end);
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69pgprot_t protection_map[16] = {
70 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
71 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
72};
73
74pgprot_t vm_get_page_prot(unsigned long vm_flags)
75{
76 return __pgprot(pgprot_val(protection_map[vm_flags &
77 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
78 pgprot_val(arch_vm_get_page_prot(vm_flags)));
79}
80EXPORT_SYMBOL(vm_get_page_prot);
81
82int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
83int sysctl_overcommit_ratio __read_mostly = 50;
84int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
85
86
87
88
89struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
108{
109 unsigned long free, allowed;
110
111 vm_acct_memory(pages);
112
113
114
115
116 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
117 return 0;
118
119 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
120 free = global_page_state(NR_FREE_PAGES);
121 free += global_page_state(NR_FILE_PAGES);
122
123
124
125
126
127
128
129 free -= global_page_state(NR_SHMEM);
130
131 free += nr_swap_pages;
132
133
134
135
136
137
138
139 free += global_page_state(NR_SLAB_RECLAIMABLE);
140
141
142
143
144 if (free <= totalreserve_pages)
145 goto error;
146 else
147 free -= totalreserve_pages;
148
149
150
151
152 if (!cap_sys_admin)
153 free -= free / 32;
154
155 if (free > pages)
156 return 0;
157
158 goto error;
159 }
160
161 allowed = (totalram_pages - hugetlb_total_pages())
162 * sysctl_overcommit_ratio / 100;
163
164
165
166 if (!cap_sys_admin)
167 allowed -= allowed / 32;
168 allowed += total_swap_pages;
169
170
171
172 if (mm)
173 allowed -= mm->total_vm / 32;
174
175 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
176 return 0;
177error:
178 vm_unacct_memory(pages);
179
180 return -ENOMEM;
181}
182
183
184
185
186static void __remove_shared_vm_struct(struct vm_area_struct *vma,
187 struct file *file, struct address_space *mapping)
188{
189 if (vma->vm_flags & VM_DENYWRITE)
190 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
191 if (vma->vm_flags & VM_SHARED)
192 mapping->i_mmap_writable--;
193
194 flush_dcache_mmap_lock(mapping);
195 if (unlikely(vma->vm_flags & VM_NONLINEAR))
196 list_del_init(&vma->shared.nonlinear);
197 else
198 vma_interval_tree_remove(vma, &mapping->i_mmap);
199 flush_dcache_mmap_unlock(mapping);
200}
201
202
203
204
205
206void unlink_file_vma(struct vm_area_struct *vma)
207{
208 struct file *file = vma->vm_file;
209
210 if (file) {
211 struct address_space *mapping = file->f_mapping;
212 mutex_lock(&mapping->i_mmap_mutex);
213 __remove_shared_vm_struct(vma, file, mapping);
214 mutex_unlock(&mapping->i_mmap_mutex);
215 }
216}
217
218
219
220
221static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
222{
223 struct vm_area_struct *next = vma->vm_next;
224
225 might_sleep();
226 if (vma->vm_ops && vma->vm_ops->close)
227 vma->vm_ops->close(vma);
228 if (vma->vm_file)
229 fput(vma->vm_file);
230 mpol_put(vma_policy(vma));
231 kmem_cache_free(vm_area_cachep, vma);
232 return next;
233}
234
235static unsigned long do_brk(unsigned long addr, unsigned long len);
236
237SYSCALL_DEFINE1(brk, unsigned long, brk)
238{
239 unsigned long rlim, retval;
240 unsigned long newbrk, oldbrk;
241 struct mm_struct *mm = current->mm;
242 unsigned long min_brk;
243
244 down_write(&mm->mmap_sem);
245
246#ifdef CONFIG_COMPAT_BRK
247
248
249
250
251
252 if (current->brk_randomized)
253 min_brk = mm->start_brk;
254 else
255 min_brk = mm->end_data;
256#else
257 min_brk = mm->start_brk;
258#endif
259 if (brk < min_brk)
260 goto out;
261
262
263
264
265
266
267
268 rlim = rlimit(RLIMIT_DATA);
269 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
270 (mm->end_data - mm->start_data) > rlim)
271 goto out;
272
273 newbrk = PAGE_ALIGN(brk);
274 oldbrk = PAGE_ALIGN(mm->brk);
275 if (oldbrk == newbrk)
276 goto set_brk;
277
278
279 if (brk <= mm->brk) {
280 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
281 goto set_brk;
282 goto out;
283 }
284
285
286 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
287 goto out;
288
289
290 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
291 goto out;
292set_brk:
293 mm->brk = brk;
294out:
295 retval = mm->brk;
296 up_write(&mm->mmap_sem);
297 return retval;
298}
299
300#ifdef CONFIG_DEBUG_VM_RB
301static int browse_rb(struct rb_root *root)
302{
303 int i = 0, j;
304 struct rb_node *nd, *pn = NULL;
305 unsigned long prev = 0, pend = 0;
306
307 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
308 struct vm_area_struct *vma;
309 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
310 if (vma->vm_start < prev)
311 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
312 if (vma->vm_start < pend)
313 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
314 if (vma->vm_start > vma->vm_end)
315 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
316 i++;
317 pn = nd;
318 prev = vma->vm_start;
319 pend = vma->vm_end;
320 }
321 j = 0;
322 for (nd = pn; nd; nd = rb_prev(nd)) {
323 j++;
324 }
325 if (i != j)
326 printk("backwards %d, forwards %d\n", j, i), i = 0;
327 return i;
328}
329
330void validate_mm(struct mm_struct *mm)
331{
332 int bug = 0;
333 int i = 0;
334 struct vm_area_struct *vma = mm->mmap;
335 while (vma) {
336 struct anon_vma_chain *avc;
337 vma_lock_anon_vma(vma);
338 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
339 anon_vma_interval_tree_verify(avc);
340 vma_unlock_anon_vma(vma);
341 vma = vma->vm_next;
342 i++;
343 }
344 if (i != mm->map_count)
345 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
346 i = browse_rb(&mm->mm_rb);
347 if (i != mm->map_count)
348 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
349 BUG_ON(bug);
350}
351#else
352#define validate_mm(mm) do { } while (0)
353#endif
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369static inline void
370anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
371{
372 struct anon_vma_chain *avc;
373
374 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
375 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
376}
377
378static inline void
379anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
380{
381 struct anon_vma_chain *avc;
382
383 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
384 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
385}
386
387static int find_vma_links(struct mm_struct *mm, unsigned long addr,
388 unsigned long end, struct vm_area_struct **pprev,
389 struct rb_node ***rb_link, struct rb_node **rb_parent)
390{
391 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
392
393 __rb_link = &mm->mm_rb.rb_node;
394 rb_prev = __rb_parent = NULL;
395
396 while (*__rb_link) {
397 struct vm_area_struct *vma_tmp;
398
399 __rb_parent = *__rb_link;
400 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
401
402 if (vma_tmp->vm_end > addr) {
403
404 if (vma_tmp->vm_start < end)
405 return -ENOMEM;
406 __rb_link = &__rb_parent->rb_left;
407 } else {
408 rb_prev = __rb_parent;
409 __rb_link = &__rb_parent->rb_right;
410 }
411 }
412
413 *pprev = NULL;
414 if (rb_prev)
415 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
416 *rb_link = __rb_link;
417 *rb_parent = __rb_parent;
418 return 0;
419}
420
421void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
422 struct rb_node **rb_link, struct rb_node *rb_parent)
423{
424 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
425 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
426}
427
428static void __vma_link_file(struct vm_area_struct *vma)
429{
430 struct file *file;
431
432 file = vma->vm_file;
433 if (file) {
434 struct address_space *mapping = file->f_mapping;
435
436 if (vma->vm_flags & VM_DENYWRITE)
437 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
438 if (vma->vm_flags & VM_SHARED)
439 mapping->i_mmap_writable++;
440
441 flush_dcache_mmap_lock(mapping);
442 if (unlikely(vma->vm_flags & VM_NONLINEAR))
443 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
444 else
445 vma_interval_tree_insert(vma, &mapping->i_mmap);
446 flush_dcache_mmap_unlock(mapping);
447 }
448}
449
450static void
451__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
452 struct vm_area_struct *prev, struct rb_node **rb_link,
453 struct rb_node *rb_parent)
454{
455 __vma_link_list(mm, vma, prev, rb_parent);
456 __vma_link_rb(mm, vma, rb_link, rb_parent);
457}
458
459static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
460 struct vm_area_struct *prev, struct rb_node **rb_link,
461 struct rb_node *rb_parent)
462{
463 struct address_space *mapping = NULL;
464
465 if (vma->vm_file)
466 mapping = vma->vm_file->f_mapping;
467
468 if (mapping)
469 mutex_lock(&mapping->i_mmap_mutex);
470
471 __vma_link(mm, vma, prev, rb_link, rb_parent);
472 __vma_link_file(vma);
473
474 if (mapping)
475 mutex_unlock(&mapping->i_mmap_mutex);
476
477 mm->map_count++;
478 validate_mm(mm);
479}
480
481
482
483
484
485static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
486{
487 struct vm_area_struct *prev;
488 struct rb_node **rb_link, *rb_parent;
489
490 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
491 &prev, &rb_link, &rb_parent))
492 BUG();
493 __vma_link(mm, vma, prev, rb_link, rb_parent);
494 mm->map_count++;
495}
496
497static inline void
498__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
499 struct vm_area_struct *prev)
500{
501 struct vm_area_struct *next = vma->vm_next;
502
503 prev->vm_next = next;
504 if (next)
505 next->vm_prev = prev;
506 rb_erase(&vma->vm_rb, &mm->mm_rb);
507 if (mm->mmap_cache == vma)
508 mm->mmap_cache = prev;
509}
510
511
512
513
514
515
516
517
518int vma_adjust(struct vm_area_struct *vma, unsigned long start,
519 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
520{
521 struct mm_struct *mm = vma->vm_mm;
522 struct vm_area_struct *next = vma->vm_next;
523 struct vm_area_struct *importer = NULL;
524 struct address_space *mapping = NULL;
525 struct rb_root *root = NULL;
526 struct anon_vma *anon_vma = NULL;
527 struct file *file = vma->vm_file;
528 long adjust_next = 0;
529 int remove_next = 0;
530
531 if (next && !insert) {
532 struct vm_area_struct *exporter = NULL;
533
534 if (end >= next->vm_end) {
535
536
537
538
539again: remove_next = 1 + (end > next->vm_end);
540 end = next->vm_end;
541 exporter = next;
542 importer = vma;
543 } else if (end > next->vm_start) {
544
545
546
547
548 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
549 exporter = next;
550 importer = vma;
551 } else if (end < vma->vm_end) {
552
553
554
555
556
557 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
558 exporter = vma;
559 importer = next;
560 }
561
562
563
564
565
566
567 if (exporter && exporter->anon_vma && !importer->anon_vma) {
568 if (anon_vma_clone(importer, exporter))
569 return -ENOMEM;
570 importer->anon_vma = exporter->anon_vma;
571 }
572 }
573
574 if (file) {
575 mapping = file->f_mapping;
576 if (!(vma->vm_flags & VM_NONLINEAR)) {
577 root = &mapping->i_mmap;
578 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
579
580 if (adjust_next)
581 uprobe_munmap(next, next->vm_start,
582 next->vm_end);
583 }
584
585 mutex_lock(&mapping->i_mmap_mutex);
586 if (insert) {
587
588
589
590
591
592
593 __vma_link_file(insert);
594 }
595 }
596
597 vma_adjust_trans_huge(vma, start, end, adjust_next);
598
599 anon_vma = vma->anon_vma;
600 if (!anon_vma && adjust_next)
601 anon_vma = next->anon_vma;
602 if (anon_vma) {
603 VM_BUG_ON(adjust_next && next->anon_vma &&
604 anon_vma != next->anon_vma);
605 anon_vma_lock(anon_vma);
606 anon_vma_interval_tree_pre_update_vma(vma);
607 if (adjust_next)
608 anon_vma_interval_tree_pre_update_vma(next);
609 }
610
611 if (root) {
612 flush_dcache_mmap_lock(mapping);
613 vma_interval_tree_remove(vma, root);
614 if (adjust_next)
615 vma_interval_tree_remove(next, root);
616 }
617
618 vma->vm_start = start;
619 vma->vm_end = end;
620 vma->vm_pgoff = pgoff;
621 if (adjust_next) {
622 next->vm_start += adjust_next << PAGE_SHIFT;
623 next->vm_pgoff += adjust_next;
624 }
625
626 if (root) {
627 if (adjust_next)
628 vma_interval_tree_insert(next, root);
629 vma_interval_tree_insert(vma, root);
630 flush_dcache_mmap_unlock(mapping);
631 }
632
633 if (remove_next) {
634
635
636
637
638 __vma_unlink(mm, next, vma);
639 if (file)
640 __remove_shared_vm_struct(next, file, mapping);
641 } else if (insert) {
642
643
644
645
646
647 __insert_vm_struct(mm, insert);
648 }
649
650 if (anon_vma) {
651 anon_vma_interval_tree_post_update_vma(vma);
652 if (adjust_next)
653 anon_vma_interval_tree_post_update_vma(next);
654 anon_vma_unlock(anon_vma);
655 }
656 if (mapping)
657 mutex_unlock(&mapping->i_mmap_mutex);
658
659 if (root) {
660 uprobe_mmap(vma);
661
662 if (adjust_next)
663 uprobe_mmap(next);
664 }
665
666 if (remove_next) {
667 if (file) {
668 uprobe_munmap(next, next->vm_start, next->vm_end);
669 fput(file);
670 }
671 if (next->anon_vma)
672 anon_vma_merge(vma, next);
673 mm->map_count--;
674 mpol_put(vma_policy(next));
675 kmem_cache_free(vm_area_cachep, next);
676
677
678
679
680
681 if (remove_next == 2) {
682 next = vma->vm_next;
683 goto again;
684 }
685 }
686 if (insert && file)
687 uprobe_mmap(insert);
688
689 validate_mm(mm);
690
691 return 0;
692}
693
694
695
696
697
698static inline int is_mergeable_vma(struct vm_area_struct *vma,
699 struct file *file, unsigned long vm_flags)
700{
701 if (vma->vm_flags ^ vm_flags)
702 return 0;
703 if (vma->vm_file != file)
704 return 0;
705 if (vma->vm_ops && vma->vm_ops->close)
706 return 0;
707 return 1;
708}
709
710static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
711 struct anon_vma *anon_vma2,
712 struct vm_area_struct *vma)
713{
714
715
716
717
718 if ((!anon_vma1 || !anon_vma2) && (!vma ||
719 list_is_singular(&vma->anon_vma_chain)))
720 return 1;
721 return anon_vma1 == anon_vma2;
722}
723
724
725
726
727
728
729
730
731
732
733
734
735static int
736can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
737 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
738{
739 if (is_mergeable_vma(vma, file, vm_flags) &&
740 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
741 if (vma->vm_pgoff == vm_pgoff)
742 return 1;
743 }
744 return 0;
745}
746
747
748
749
750
751
752
753
754static int
755can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
756 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
757{
758 if (is_mergeable_vma(vma, file, vm_flags) &&
759 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
760 pgoff_t vm_pglen;
761 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
762 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
763 return 1;
764 }
765 return 0;
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797struct vm_area_struct *vma_merge(struct mm_struct *mm,
798 struct vm_area_struct *prev, unsigned long addr,
799 unsigned long end, unsigned long vm_flags,
800 struct anon_vma *anon_vma, struct file *file,
801 pgoff_t pgoff, struct mempolicy *policy)
802{
803 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
804 struct vm_area_struct *area, *next;
805 int err;
806
807
808
809
810
811 if (vm_flags & VM_SPECIAL)
812 return NULL;
813
814 if (prev)
815 next = prev->vm_next;
816 else
817 next = mm->mmap;
818 area = next;
819 if (next && next->vm_end == end)
820 next = next->vm_next;
821
822
823
824
825 if (prev && prev->vm_end == addr &&
826 mpol_equal(vma_policy(prev), policy) &&
827 can_vma_merge_after(prev, vm_flags,
828 anon_vma, file, pgoff)) {
829
830
831
832 if (next && end == next->vm_start &&
833 mpol_equal(policy, vma_policy(next)) &&
834 can_vma_merge_before(next, vm_flags,
835 anon_vma, file, pgoff+pglen) &&
836 is_mergeable_anon_vma(prev->anon_vma,
837 next->anon_vma, NULL)) {
838
839 err = vma_adjust(prev, prev->vm_start,
840 next->vm_end, prev->vm_pgoff, NULL);
841 } else
842 err = vma_adjust(prev, prev->vm_start,
843 end, prev->vm_pgoff, NULL);
844 if (err)
845 return NULL;
846 khugepaged_enter_vma_merge(prev);
847 return prev;
848 }
849
850
851
852
853 if (next && end == next->vm_start &&
854 mpol_equal(policy, vma_policy(next)) &&
855 can_vma_merge_before(next, vm_flags,
856 anon_vma, file, pgoff+pglen)) {
857 if (prev && addr < prev->vm_end)
858 err = vma_adjust(prev, prev->vm_start,
859 addr, prev->vm_pgoff, NULL);
860 else
861 err = vma_adjust(area, addr, next->vm_end,
862 next->vm_pgoff - pglen, NULL);
863 if (err)
864 return NULL;
865 khugepaged_enter_vma_merge(area);
866 return area;
867 }
868
869 return NULL;
870}
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
886{
887 return a->vm_end == b->vm_start &&
888 mpol_equal(vma_policy(a), vma_policy(b)) &&
889 a->vm_file == b->vm_file &&
890 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
891 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
892}
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
917{
918 if (anon_vma_compatible(a, b)) {
919 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
920
921 if (anon_vma && list_is_singular(&old->anon_vma_chain))
922 return anon_vma;
923 }
924 return NULL;
925}
926
927
928
929
930
931
932
933
934
935struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
936{
937 struct anon_vma *anon_vma;
938 struct vm_area_struct *near;
939
940 near = vma->vm_next;
941 if (!near)
942 goto try_prev;
943
944 anon_vma = reusable_anon_vma(near, vma, near);
945 if (anon_vma)
946 return anon_vma;
947try_prev:
948 near = vma->vm_prev;
949 if (!near)
950 goto none;
951
952 anon_vma = reusable_anon_vma(near, near, vma);
953 if (anon_vma)
954 return anon_vma;
955none:
956
957
958
959
960
961
962
963
964 return NULL;
965}
966
967#ifdef CONFIG_PROC_FS
968void vm_stat_account(struct mm_struct *mm, unsigned long flags,
969 struct file *file, long pages)
970{
971 const unsigned long stack_flags
972 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
973
974 mm->total_vm += pages;
975
976 if (file) {
977 mm->shared_vm += pages;
978 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
979 mm->exec_vm += pages;
980 } else if (flags & stack_flags)
981 mm->stack_vm += pages;
982}
983#endif
984
985
986
987
988
989static inline unsigned long round_hint_to_min(unsigned long hint)
990{
991 hint &= PAGE_MASK;
992 if (((void *)hint != NULL) &&
993 (hint < mmap_min_addr))
994 return PAGE_ALIGN(mmap_min_addr);
995 return hint;
996}
997
998
999
1000
1001
1002unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1003 unsigned long len, unsigned long prot,
1004 unsigned long flags, unsigned long pgoff)
1005{
1006 struct mm_struct * mm = current->mm;
1007 struct inode *inode;
1008 vm_flags_t vm_flags;
1009
1010
1011
1012
1013
1014
1015
1016 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1017 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
1018 prot |= PROT_EXEC;
1019
1020 if (!len)
1021 return -EINVAL;
1022
1023 if (!(flags & MAP_FIXED))
1024 addr = round_hint_to_min(addr);
1025
1026
1027 len = PAGE_ALIGN(len);
1028 if (!len)
1029 return -ENOMEM;
1030
1031
1032 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1033 return -EOVERFLOW;
1034
1035
1036 if (mm->map_count > sysctl_max_map_count)
1037 return -ENOMEM;
1038
1039
1040
1041
1042 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1043 if (addr & ~PAGE_MASK)
1044 return addr;
1045
1046
1047
1048
1049
1050 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1051 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1052
1053 if (flags & MAP_LOCKED)
1054 if (!can_do_mlock())
1055 return -EPERM;
1056
1057
1058 if (vm_flags & VM_LOCKED) {
1059 unsigned long locked, lock_limit;
1060 locked = len >> PAGE_SHIFT;
1061 locked += mm->locked_vm;
1062 lock_limit = rlimit(RLIMIT_MEMLOCK);
1063 lock_limit >>= PAGE_SHIFT;
1064 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1065 return -EAGAIN;
1066 }
1067
1068 inode = file ? file->f_path.dentry->d_inode : NULL;
1069
1070 if (file) {
1071 switch (flags & MAP_TYPE) {
1072 case MAP_SHARED:
1073 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1074 return -EACCES;
1075
1076
1077
1078
1079
1080 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1081 return -EACCES;
1082
1083
1084
1085
1086 if (locks_verify_locked(inode))
1087 return -EAGAIN;
1088
1089 vm_flags |= VM_SHARED | VM_MAYSHARE;
1090 if (!(file->f_mode & FMODE_WRITE))
1091 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1092
1093
1094 case MAP_PRIVATE:
1095 if (!(file->f_mode & FMODE_READ))
1096 return -EACCES;
1097 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1098 if (vm_flags & VM_EXEC)
1099 return -EPERM;
1100 vm_flags &= ~VM_MAYEXEC;
1101 }
1102
1103 if (!file->f_op || !file->f_op->mmap)
1104 return -ENODEV;
1105 break;
1106
1107 default:
1108 return -EINVAL;
1109 }
1110 } else {
1111 switch (flags & MAP_TYPE) {
1112 case MAP_SHARED:
1113
1114
1115
1116 pgoff = 0;
1117 vm_flags |= VM_SHARED | VM_MAYSHARE;
1118 break;
1119 case MAP_PRIVATE:
1120
1121
1122
1123 pgoff = addr >> PAGE_SHIFT;
1124 break;
1125 default:
1126 return -EINVAL;
1127 }
1128 }
1129
1130 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1131}
1132
1133SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1134 unsigned long, prot, unsigned long, flags,
1135 unsigned long, fd, unsigned long, pgoff)
1136{
1137 struct file *file = NULL;
1138 unsigned long retval = -EBADF;
1139
1140 if (!(flags & MAP_ANONYMOUS)) {
1141 audit_mmap_fd(fd, flags);
1142 if (unlikely(flags & MAP_HUGETLB))
1143 return -EINVAL;
1144 file = fget(fd);
1145 if (!file)
1146 goto out;
1147 } else if (flags & MAP_HUGETLB) {
1148 struct user_struct *user = NULL;
1149
1150
1151
1152
1153
1154
1155 file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len,
1156 VM_NORESERVE, &user,
1157 HUGETLB_ANONHUGE_INODE);
1158 if (IS_ERR(file))
1159 return PTR_ERR(file);
1160 }
1161
1162 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1163
1164 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1165 if (file)
1166 fput(file);
1167out:
1168 return retval;
1169}
1170
1171#ifdef __ARCH_WANT_SYS_OLD_MMAP
1172struct mmap_arg_struct {
1173 unsigned long addr;
1174 unsigned long len;
1175 unsigned long prot;
1176 unsigned long flags;
1177 unsigned long fd;
1178 unsigned long offset;
1179};
1180
1181SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1182{
1183 struct mmap_arg_struct a;
1184
1185 if (copy_from_user(&a, arg, sizeof(a)))
1186 return -EFAULT;
1187 if (a.offset & ~PAGE_MASK)
1188 return -EINVAL;
1189
1190 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1191 a.offset >> PAGE_SHIFT);
1192}
1193#endif
1194
1195
1196
1197
1198
1199
1200
1201int vma_wants_writenotify(struct vm_area_struct *vma)
1202{
1203 vm_flags_t vm_flags = vma->vm_flags;
1204
1205
1206 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1207 return 0;
1208
1209
1210 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1211 return 1;
1212
1213
1214 if (pgprot_val(vma->vm_page_prot) !=
1215 pgprot_val(vm_get_page_prot(vm_flags)))
1216 return 0;
1217
1218
1219 if (vm_flags & VM_PFNMAP)
1220 return 0;
1221
1222
1223 return vma->vm_file && vma->vm_file->f_mapping &&
1224 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1225}
1226
1227
1228
1229
1230
1231static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1232{
1233
1234
1235
1236
1237 if (file && is_file_hugepages(file))
1238 return 0;
1239
1240 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1241}
1242
1243unsigned long mmap_region(struct file *file, unsigned long addr,
1244 unsigned long len, unsigned long flags,
1245 vm_flags_t vm_flags, unsigned long pgoff)
1246{
1247 struct mm_struct *mm = current->mm;
1248 struct vm_area_struct *vma, *prev;
1249 int correct_wcount = 0;
1250 int error;
1251 struct rb_node **rb_link, *rb_parent;
1252 unsigned long charged = 0;
1253 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1254
1255
1256 error = -ENOMEM;
1257munmap_back:
1258 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
1259 if (do_munmap(mm, addr, len))
1260 return -ENOMEM;
1261 goto munmap_back;
1262 }
1263
1264
1265 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1266 return -ENOMEM;
1267
1268
1269
1270
1271
1272 if ((flags & MAP_NORESERVE)) {
1273
1274 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1275 vm_flags |= VM_NORESERVE;
1276
1277
1278 if (file && is_file_hugepages(file))
1279 vm_flags |= VM_NORESERVE;
1280 }
1281
1282
1283
1284
1285 if (accountable_mapping(file, vm_flags)) {
1286 charged = len >> PAGE_SHIFT;
1287 if (security_vm_enough_memory_mm(mm, charged))
1288 return -ENOMEM;
1289 vm_flags |= VM_ACCOUNT;
1290 }
1291
1292
1293
1294
1295 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1296 if (vma)
1297 goto out;
1298
1299
1300
1301
1302
1303
1304 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1305 if (!vma) {
1306 error = -ENOMEM;
1307 goto unacct_error;
1308 }
1309
1310 vma->vm_mm = mm;
1311 vma->vm_start = addr;
1312 vma->vm_end = addr + len;
1313 vma->vm_flags = vm_flags;
1314 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1315 vma->vm_pgoff = pgoff;
1316 INIT_LIST_HEAD(&vma->anon_vma_chain);
1317
1318 error = -EINVAL;
1319
1320 if (file) {
1321 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1322 goto free_vma;
1323 if (vm_flags & VM_DENYWRITE) {
1324 error = deny_write_access(file);
1325 if (error)
1326 goto free_vma;
1327 correct_wcount = 1;
1328 }
1329 vma->vm_file = get_file(file);
1330 error = file->f_op->mmap(file, vma);
1331 if (error)
1332 goto unmap_and_free_vma;
1333
1334
1335
1336
1337
1338
1339 addr = vma->vm_start;
1340 pgoff = vma->vm_pgoff;
1341 vm_flags = vma->vm_flags;
1342 } else if (vm_flags & VM_SHARED) {
1343 if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
1344 goto free_vma;
1345 error = shmem_zero_setup(vma);
1346 if (error)
1347 goto free_vma;
1348 }
1349
1350 if (vma_wants_writenotify(vma)) {
1351 pgprot_t pprot = vma->vm_page_prot;
1352
1353
1354
1355
1356
1357
1358
1359
1360 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1361 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1362 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1363 }
1364
1365 vma_link(mm, vma, prev, rb_link, rb_parent);
1366 file = vma->vm_file;
1367
1368
1369 if (correct_wcount)
1370 atomic_inc(&inode->i_writecount);
1371out:
1372 perf_event_mmap(vma);
1373
1374 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1375 if (vm_flags & VM_LOCKED) {
1376 if (!mlock_vma_pages_range(vma, addr, addr + len))
1377 mm->locked_vm += (len >> PAGE_SHIFT);
1378 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1379 make_pages_present(addr, addr + len);
1380
1381 if (file)
1382 uprobe_mmap(vma);
1383
1384 return addr;
1385
1386unmap_and_free_vma:
1387 if (correct_wcount)
1388 atomic_inc(&inode->i_writecount);
1389 vma->vm_file = NULL;
1390 fput(file);
1391
1392
1393 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1394 charged = 0;
1395free_vma:
1396 kmem_cache_free(vm_area_cachep, vma);
1397unacct_error:
1398 if (charged)
1399 vm_unacct_memory(charged);
1400 return error;
1401}
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414#ifndef HAVE_ARCH_UNMAPPED_AREA
1415unsigned long
1416arch_get_unmapped_area(struct file *filp, unsigned long addr,
1417 unsigned long len, unsigned long pgoff, unsigned long flags)
1418{
1419 struct mm_struct *mm = current->mm;
1420 struct vm_area_struct *vma;
1421 unsigned long start_addr;
1422
1423 if (len > TASK_SIZE)
1424 return -ENOMEM;
1425
1426 if (flags & MAP_FIXED)
1427 return addr;
1428
1429 if (addr) {
1430 addr = PAGE_ALIGN(addr);
1431 vma = find_vma(mm, addr);
1432 if (TASK_SIZE - len >= addr &&
1433 (!vma || addr + len <= vma->vm_start))
1434 return addr;
1435 }
1436 if (len > mm->cached_hole_size) {
1437 start_addr = addr = mm->free_area_cache;
1438 } else {
1439 start_addr = addr = TASK_UNMAPPED_BASE;
1440 mm->cached_hole_size = 0;
1441 }
1442
1443full_search:
1444 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1445
1446 if (TASK_SIZE - len < addr) {
1447
1448
1449
1450
1451 if (start_addr != TASK_UNMAPPED_BASE) {
1452 addr = TASK_UNMAPPED_BASE;
1453 start_addr = addr;
1454 mm->cached_hole_size = 0;
1455 goto full_search;
1456 }
1457 return -ENOMEM;
1458 }
1459 if (!vma || addr + len <= vma->vm_start) {
1460
1461
1462
1463 mm->free_area_cache = addr + len;
1464 return addr;
1465 }
1466 if (addr + mm->cached_hole_size < vma->vm_start)
1467 mm->cached_hole_size = vma->vm_start - addr;
1468 addr = vma->vm_end;
1469 }
1470}
1471#endif
1472
1473void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1474{
1475
1476
1477
1478 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
1479 mm->free_area_cache = addr;
1480}
1481
1482
1483
1484
1485
1486#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1487unsigned long
1488arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1489 const unsigned long len, const unsigned long pgoff,
1490 const unsigned long flags)
1491{
1492 struct vm_area_struct *vma;
1493 struct mm_struct *mm = current->mm;
1494 unsigned long addr = addr0, start_addr;
1495
1496
1497 if (len > TASK_SIZE)
1498 return -ENOMEM;
1499
1500 if (flags & MAP_FIXED)
1501 return addr;
1502
1503
1504 if (addr) {
1505 addr = PAGE_ALIGN(addr);
1506 vma = find_vma(mm, addr);
1507 if (TASK_SIZE - len >= addr &&
1508 (!vma || addr + len <= vma->vm_start))
1509 return addr;
1510 }
1511
1512
1513 if (len <= mm->cached_hole_size) {
1514 mm->cached_hole_size = 0;
1515 mm->free_area_cache = mm->mmap_base;
1516 }
1517
1518try_again:
1519
1520 start_addr = addr = mm->free_area_cache;
1521
1522 if (addr < len)
1523 goto fail;
1524
1525 addr -= len;
1526 do {
1527
1528
1529
1530
1531
1532 vma = find_vma(mm, addr);
1533 if (!vma || addr+len <= vma->vm_start)
1534
1535 return (mm->free_area_cache = addr);
1536
1537
1538 if (addr + mm->cached_hole_size < vma->vm_start)
1539 mm->cached_hole_size = vma->vm_start - addr;
1540
1541
1542 addr = vma->vm_start-len;
1543 } while (len < vma->vm_start);
1544
1545fail:
1546
1547
1548
1549
1550
1551
1552
1553
1554 if (start_addr != mm->mmap_base) {
1555 mm->free_area_cache = mm->mmap_base;
1556 mm->cached_hole_size = 0;
1557 goto try_again;
1558 }
1559
1560
1561
1562
1563
1564
1565
1566 mm->cached_hole_size = ~0UL;
1567 mm->free_area_cache = TASK_UNMAPPED_BASE;
1568 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1569
1570
1571
1572 mm->free_area_cache = mm->mmap_base;
1573 mm->cached_hole_size = ~0UL;
1574
1575 return addr;
1576}
1577#endif
1578
1579void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1580{
1581
1582
1583
1584 if (addr > mm->free_area_cache)
1585 mm->free_area_cache = addr;
1586
1587
1588 if (mm->free_area_cache > mm->mmap_base)
1589 mm->free_area_cache = mm->mmap_base;
1590}
1591
1592unsigned long
1593get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1594 unsigned long pgoff, unsigned long flags)
1595{
1596 unsigned long (*get_area)(struct file *, unsigned long,
1597 unsigned long, unsigned long, unsigned long);
1598
1599 unsigned long error = arch_mmap_check(addr, len, flags);
1600 if (error)
1601 return error;
1602
1603
1604 if (len > TASK_SIZE)
1605 return -ENOMEM;
1606
1607 get_area = current->mm->get_unmapped_area;
1608 if (file && file->f_op && file->f_op->get_unmapped_area)
1609 get_area = file->f_op->get_unmapped_area;
1610 addr = get_area(file, addr, len, pgoff, flags);
1611 if (IS_ERR_VALUE(addr))
1612 return addr;
1613
1614 if (addr > TASK_SIZE - len)
1615 return -ENOMEM;
1616 if (addr & ~PAGE_MASK)
1617 return -EINVAL;
1618
1619 addr = arch_rebalance_pgtables(addr, len);
1620 error = security_mmap_addr(addr);
1621 return error ? error : addr;
1622}
1623
1624EXPORT_SYMBOL(get_unmapped_area);
1625
1626
1627struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1628{
1629 struct vm_area_struct *vma = NULL;
1630
1631 if (WARN_ON_ONCE(!mm))
1632 return NULL;
1633
1634
1635
1636 vma = mm->mmap_cache;
1637 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1638 struct rb_node *rb_node;
1639
1640 rb_node = mm->mm_rb.rb_node;
1641 vma = NULL;
1642
1643 while (rb_node) {
1644 struct vm_area_struct *vma_tmp;
1645
1646 vma_tmp = rb_entry(rb_node,
1647 struct vm_area_struct, vm_rb);
1648
1649 if (vma_tmp->vm_end > addr) {
1650 vma = vma_tmp;
1651 if (vma_tmp->vm_start <= addr)
1652 break;
1653 rb_node = rb_node->rb_left;
1654 } else
1655 rb_node = rb_node->rb_right;
1656 }
1657 if (vma)
1658 mm->mmap_cache = vma;
1659 }
1660 return vma;
1661}
1662
1663EXPORT_SYMBOL(find_vma);
1664
1665
1666
1667
1668struct vm_area_struct *
1669find_vma_prev(struct mm_struct *mm, unsigned long addr,
1670 struct vm_area_struct **pprev)
1671{
1672 struct vm_area_struct *vma;
1673
1674 vma = find_vma(mm, addr);
1675 if (vma) {
1676 *pprev = vma->vm_prev;
1677 } else {
1678 struct rb_node *rb_node = mm->mm_rb.rb_node;
1679 *pprev = NULL;
1680 while (rb_node) {
1681 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1682 rb_node = rb_node->rb_right;
1683 }
1684 }
1685 return vma;
1686}
1687
1688
1689
1690
1691
1692
1693static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1694{
1695 struct mm_struct *mm = vma->vm_mm;
1696 struct rlimit *rlim = current->signal->rlim;
1697 unsigned long new_start;
1698
1699
1700 if (!may_expand_vm(mm, grow))
1701 return -ENOMEM;
1702
1703
1704 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
1705 return -ENOMEM;
1706
1707
1708 if (vma->vm_flags & VM_LOCKED) {
1709 unsigned long locked;
1710 unsigned long limit;
1711 locked = mm->locked_vm + grow;
1712 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
1713 limit >>= PAGE_SHIFT;
1714 if (locked > limit && !capable(CAP_IPC_LOCK))
1715 return -ENOMEM;
1716 }
1717
1718
1719 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1720 vma->vm_end - size;
1721 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1722 return -EFAULT;
1723
1724
1725
1726
1727
1728 if (security_vm_enough_memory_mm(mm, grow))
1729 return -ENOMEM;
1730
1731
1732 if (vma->vm_flags & VM_LOCKED)
1733 mm->locked_vm += grow;
1734 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1735 return 0;
1736}
1737
1738#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1739
1740
1741
1742
1743int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1744{
1745 int error;
1746
1747 if (!(vma->vm_flags & VM_GROWSUP))
1748 return -EFAULT;
1749
1750
1751
1752
1753
1754 if (unlikely(anon_vma_prepare(vma)))
1755 return -ENOMEM;
1756 vma_lock_anon_vma(vma);
1757
1758
1759
1760
1761
1762
1763
1764 if (address < PAGE_ALIGN(address+4))
1765 address = PAGE_ALIGN(address+4);
1766 else {
1767 vma_unlock_anon_vma(vma);
1768 return -ENOMEM;
1769 }
1770 error = 0;
1771
1772
1773 if (address > vma->vm_end) {
1774 unsigned long size, grow;
1775
1776 size = address - vma->vm_start;
1777 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1778
1779 error = -ENOMEM;
1780 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
1781 error = acct_stack_growth(vma, size, grow);
1782 if (!error) {
1783 anon_vma_interval_tree_pre_update_vma(vma);
1784 vma->vm_end = address;
1785 anon_vma_interval_tree_post_update_vma(vma);
1786 perf_event_mmap(vma);
1787 }
1788 }
1789 }
1790 vma_unlock_anon_vma(vma);
1791 khugepaged_enter_vma_merge(vma);
1792 validate_mm(vma->vm_mm);
1793 return error;
1794}
1795#endif
1796
1797
1798
1799
1800int expand_downwards(struct vm_area_struct *vma,
1801 unsigned long address)
1802{
1803 int error;
1804
1805
1806
1807
1808
1809 if (unlikely(anon_vma_prepare(vma)))
1810 return -ENOMEM;
1811
1812 address &= PAGE_MASK;
1813 error = security_mmap_addr(address);
1814 if (error)
1815 return error;
1816
1817 vma_lock_anon_vma(vma);
1818
1819
1820
1821
1822
1823
1824
1825
1826 if (address < vma->vm_start) {
1827 unsigned long size, grow;
1828
1829 size = vma->vm_end - address;
1830 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1831
1832 error = -ENOMEM;
1833 if (grow <= vma->vm_pgoff) {
1834 error = acct_stack_growth(vma, size, grow);
1835 if (!error) {
1836 anon_vma_interval_tree_pre_update_vma(vma);
1837 vma->vm_start = address;
1838 vma->vm_pgoff -= grow;
1839 anon_vma_interval_tree_post_update_vma(vma);
1840 perf_event_mmap(vma);
1841 }
1842 }
1843 }
1844 vma_unlock_anon_vma(vma);
1845 khugepaged_enter_vma_merge(vma);
1846 validate_mm(vma->vm_mm);
1847 return error;
1848}
1849
1850#ifdef CONFIG_STACK_GROWSUP
1851int expand_stack(struct vm_area_struct *vma, unsigned long address)
1852{
1853 return expand_upwards(vma, address);
1854}
1855
1856struct vm_area_struct *
1857find_extend_vma(struct mm_struct *mm, unsigned long addr)
1858{
1859 struct vm_area_struct *vma, *prev;
1860
1861 addr &= PAGE_MASK;
1862 vma = find_vma_prev(mm, addr, &prev);
1863 if (vma && (vma->vm_start <= addr))
1864 return vma;
1865 if (!prev || expand_stack(prev, addr))
1866 return NULL;
1867 if (prev->vm_flags & VM_LOCKED) {
1868 mlock_vma_pages_range(prev, addr, prev->vm_end);
1869 }
1870 return prev;
1871}
1872#else
1873int expand_stack(struct vm_area_struct *vma, unsigned long address)
1874{
1875 return expand_downwards(vma, address);
1876}
1877
1878struct vm_area_struct *
1879find_extend_vma(struct mm_struct * mm, unsigned long addr)
1880{
1881 struct vm_area_struct * vma;
1882 unsigned long start;
1883
1884 addr &= PAGE_MASK;
1885 vma = find_vma(mm,addr);
1886 if (!vma)
1887 return NULL;
1888 if (vma->vm_start <= addr)
1889 return vma;
1890 if (!(vma->vm_flags & VM_GROWSDOWN))
1891 return NULL;
1892 start = vma->vm_start;
1893 if (expand_stack(vma, addr))
1894 return NULL;
1895 if (vma->vm_flags & VM_LOCKED) {
1896 mlock_vma_pages_range(vma, addr, start);
1897 }
1898 return vma;
1899}
1900#endif
1901
1902
1903
1904
1905
1906
1907
1908static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1909{
1910 unsigned long nr_accounted = 0;
1911
1912
1913 update_hiwater_vm(mm);
1914 do {
1915 long nrpages = vma_pages(vma);
1916
1917 if (vma->vm_flags & VM_ACCOUNT)
1918 nr_accounted += nrpages;
1919 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1920 vma = remove_vma(vma);
1921 } while (vma);
1922 vm_unacct_memory(nr_accounted);
1923 validate_mm(mm);
1924}
1925
1926
1927
1928
1929
1930
1931static void unmap_region(struct mm_struct *mm,
1932 struct vm_area_struct *vma, struct vm_area_struct *prev,
1933 unsigned long start, unsigned long end)
1934{
1935 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1936 struct mmu_gather tlb;
1937
1938 lru_add_drain();
1939 tlb_gather_mmu(&tlb, mm, 0);
1940 update_hiwater_rss(mm);
1941 unmap_vmas(&tlb, vma, start, end);
1942 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
1943 next ? next->vm_start : 0);
1944 tlb_finish_mmu(&tlb, start, end);
1945}
1946
1947
1948
1949
1950
1951static void
1952detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1953 struct vm_area_struct *prev, unsigned long end)
1954{
1955 struct vm_area_struct **insertion_point;
1956 struct vm_area_struct *tail_vma = NULL;
1957 unsigned long addr;
1958
1959 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1960 vma->vm_prev = NULL;
1961 do {
1962 rb_erase(&vma->vm_rb, &mm->mm_rb);
1963 mm->map_count--;
1964 tail_vma = vma;
1965 vma = vma->vm_next;
1966 } while (vma && vma->vm_start < end);
1967 *insertion_point = vma;
1968 if (vma)
1969 vma->vm_prev = prev;
1970 tail_vma->vm_next = NULL;
1971 if (mm->unmap_area == arch_unmap_area)
1972 addr = prev ? prev->vm_end : mm->mmap_base;
1973 else
1974 addr = vma ? vma->vm_start : mm->mmap_base;
1975 mm->unmap_area(mm, addr);
1976 mm->mmap_cache = NULL;
1977}
1978
1979
1980
1981
1982
1983static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1984 unsigned long addr, int new_below)
1985{
1986 struct mempolicy *pol;
1987 struct vm_area_struct *new;
1988 int err = -ENOMEM;
1989
1990 if (is_vm_hugetlb_page(vma) && (addr &
1991 ~(huge_page_mask(hstate_vma(vma)))))
1992 return -EINVAL;
1993
1994 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1995 if (!new)
1996 goto out_err;
1997
1998
1999 *new = *vma;
2000
2001 INIT_LIST_HEAD(&new->anon_vma_chain);
2002
2003 if (new_below)
2004 new->vm_end = addr;
2005 else {
2006 new->vm_start = addr;
2007 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2008 }
2009
2010 pol = mpol_dup(vma_policy(vma));
2011 if (IS_ERR(pol)) {
2012 err = PTR_ERR(pol);
2013 goto out_free_vma;
2014 }
2015 vma_set_policy(new, pol);
2016
2017 if (anon_vma_clone(new, vma))
2018 goto out_free_mpol;
2019
2020 if (new->vm_file)
2021 get_file(new->vm_file);
2022
2023 if (new->vm_ops && new->vm_ops->open)
2024 new->vm_ops->open(new);
2025
2026 if (new_below)
2027 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2028 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2029 else
2030 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2031
2032
2033 if (!err)
2034 return 0;
2035
2036
2037 if (new->vm_ops && new->vm_ops->close)
2038 new->vm_ops->close(new);
2039 if (new->vm_file)
2040 fput(new->vm_file);
2041 unlink_anon_vmas(new);
2042 out_free_mpol:
2043 mpol_put(pol);
2044 out_free_vma:
2045 kmem_cache_free(vm_area_cachep, new);
2046 out_err:
2047 return err;
2048}
2049
2050
2051
2052
2053
2054int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2055 unsigned long addr, int new_below)
2056{
2057 if (mm->map_count >= sysctl_max_map_count)
2058 return -ENOMEM;
2059
2060 return __split_vma(mm, vma, addr, new_below);
2061}
2062
2063
2064
2065
2066
2067
2068int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2069{
2070 unsigned long end;
2071 struct vm_area_struct *vma, *prev, *last;
2072
2073 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2074 return -EINVAL;
2075
2076 if ((len = PAGE_ALIGN(len)) == 0)
2077 return -EINVAL;
2078
2079
2080 vma = find_vma(mm, start);
2081 if (!vma)
2082 return 0;
2083 prev = vma->vm_prev;
2084
2085
2086
2087 end = start + len;
2088 if (vma->vm_start >= end)
2089 return 0;
2090
2091
2092
2093
2094
2095
2096
2097
2098 if (start > vma->vm_start) {
2099 int error;
2100
2101
2102
2103
2104
2105
2106 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2107 return -ENOMEM;
2108
2109 error = __split_vma(mm, vma, start, 0);
2110 if (error)
2111 return error;
2112 prev = vma;
2113 }
2114
2115
2116 last = find_vma(mm, end);
2117 if (last && end > last->vm_start) {
2118 int error = __split_vma(mm, last, end, 1);
2119 if (error)
2120 return error;
2121 }
2122 vma = prev? prev->vm_next: mm->mmap;
2123
2124
2125
2126
2127 if (mm->locked_vm) {
2128 struct vm_area_struct *tmp = vma;
2129 while (tmp && tmp->vm_start < end) {
2130 if (tmp->vm_flags & VM_LOCKED) {
2131 mm->locked_vm -= vma_pages(tmp);
2132 munlock_vma_pages_all(tmp);
2133 }
2134 tmp = tmp->vm_next;
2135 }
2136 }
2137
2138
2139
2140
2141 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2142 unmap_region(mm, vma, prev, start, end);
2143
2144
2145 remove_vma_list(mm, vma);
2146
2147 return 0;
2148}
2149
2150int vm_munmap(unsigned long start, size_t len)
2151{
2152 int ret;
2153 struct mm_struct *mm = current->mm;
2154
2155 down_write(&mm->mmap_sem);
2156 ret = do_munmap(mm, start, len);
2157 up_write(&mm->mmap_sem);
2158 return ret;
2159}
2160EXPORT_SYMBOL(vm_munmap);
2161
2162SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2163{
2164 profile_munmap(addr);
2165 return vm_munmap(addr, len);
2166}
2167
2168static inline void verify_mm_writelocked(struct mm_struct *mm)
2169{
2170#ifdef CONFIG_DEBUG_VM
2171 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2172 WARN_ON(1);
2173 up_read(&mm->mmap_sem);
2174 }
2175#endif
2176}
2177
2178
2179
2180
2181
2182
2183static unsigned long do_brk(unsigned long addr, unsigned long len)
2184{
2185 struct mm_struct * mm = current->mm;
2186 struct vm_area_struct * vma, * prev;
2187 unsigned long flags;
2188 struct rb_node ** rb_link, * rb_parent;
2189 pgoff_t pgoff = addr >> PAGE_SHIFT;
2190 int error;
2191
2192 len = PAGE_ALIGN(len);
2193 if (!len)
2194 return addr;
2195
2196 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2197
2198 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2199 if (error & ~PAGE_MASK)
2200 return error;
2201
2202
2203
2204
2205 if (mm->def_flags & VM_LOCKED) {
2206 unsigned long locked, lock_limit;
2207 locked = len >> PAGE_SHIFT;
2208 locked += mm->locked_vm;
2209 lock_limit = rlimit(RLIMIT_MEMLOCK);
2210 lock_limit >>= PAGE_SHIFT;
2211 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2212 return -EAGAIN;
2213 }
2214
2215
2216
2217
2218
2219 verify_mm_writelocked(mm);
2220
2221
2222
2223
2224 munmap_back:
2225 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
2226 if (do_munmap(mm, addr, len))
2227 return -ENOMEM;
2228 goto munmap_back;
2229 }
2230
2231
2232 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2233 return -ENOMEM;
2234
2235 if (mm->map_count > sysctl_max_map_count)
2236 return -ENOMEM;
2237
2238 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2239 return -ENOMEM;
2240
2241
2242 vma = vma_merge(mm, prev, addr, addr + len, flags,
2243 NULL, NULL, pgoff, NULL);
2244 if (vma)
2245 goto out;
2246
2247
2248
2249
2250 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2251 if (!vma) {
2252 vm_unacct_memory(len >> PAGE_SHIFT);
2253 return -ENOMEM;
2254 }
2255
2256 INIT_LIST_HEAD(&vma->anon_vma_chain);
2257 vma->vm_mm = mm;
2258 vma->vm_start = addr;
2259 vma->vm_end = addr + len;
2260 vma->vm_pgoff = pgoff;
2261 vma->vm_flags = flags;
2262 vma->vm_page_prot = vm_get_page_prot(flags);
2263 vma_link(mm, vma, prev, rb_link, rb_parent);
2264out:
2265 perf_event_mmap(vma);
2266 mm->total_vm += len >> PAGE_SHIFT;
2267 if (flags & VM_LOCKED) {
2268 if (!mlock_vma_pages_range(vma, addr, addr + len))
2269 mm->locked_vm += (len >> PAGE_SHIFT);
2270 }
2271 return addr;
2272}
2273
2274unsigned long vm_brk(unsigned long addr, unsigned long len)
2275{
2276 struct mm_struct *mm = current->mm;
2277 unsigned long ret;
2278
2279 down_write(&mm->mmap_sem);
2280 ret = do_brk(addr, len);
2281 up_write(&mm->mmap_sem);
2282 return ret;
2283}
2284EXPORT_SYMBOL(vm_brk);
2285
2286
2287void exit_mmap(struct mm_struct *mm)
2288{
2289 struct mmu_gather tlb;
2290 struct vm_area_struct *vma;
2291 unsigned long nr_accounted = 0;
2292
2293
2294 mmu_notifier_release(mm);
2295
2296 if (mm->locked_vm) {
2297 vma = mm->mmap;
2298 while (vma) {
2299 if (vma->vm_flags & VM_LOCKED)
2300 munlock_vma_pages_all(vma);
2301 vma = vma->vm_next;
2302 }
2303 }
2304
2305 arch_exit_mmap(mm);
2306
2307 vma = mm->mmap;
2308 if (!vma)
2309 return;
2310
2311 lru_add_drain();
2312 flush_cache_mm(mm);
2313 tlb_gather_mmu(&tlb, mm, 1);
2314
2315
2316 unmap_vmas(&tlb, vma, 0, -1);
2317
2318 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
2319 tlb_finish_mmu(&tlb, 0, -1);
2320
2321
2322
2323
2324
2325 while (vma) {
2326 if (vma->vm_flags & VM_ACCOUNT)
2327 nr_accounted += vma_pages(vma);
2328 vma = remove_vma(vma);
2329 }
2330 vm_unacct_memory(nr_accounted);
2331
2332 WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2333}
2334
2335
2336
2337
2338
2339int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2340{
2341 struct vm_area_struct *prev;
2342 struct rb_node **rb_link, *rb_parent;
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356 if (!vma->vm_file) {
2357 BUG_ON(vma->anon_vma);
2358 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2359 }
2360 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2361 &prev, &rb_link, &rb_parent))
2362 return -ENOMEM;
2363 if ((vma->vm_flags & VM_ACCOUNT) &&
2364 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2365 return -ENOMEM;
2366
2367 vma_link(mm, vma, prev, rb_link, rb_parent);
2368 return 0;
2369}
2370
2371
2372
2373
2374
2375struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2376 unsigned long addr, unsigned long len, pgoff_t pgoff,
2377 bool *need_rmap_locks)
2378{
2379 struct vm_area_struct *vma = *vmap;
2380 unsigned long vma_start = vma->vm_start;
2381 struct mm_struct *mm = vma->vm_mm;
2382 struct vm_area_struct *new_vma, *prev;
2383 struct rb_node **rb_link, *rb_parent;
2384 struct mempolicy *pol;
2385 bool faulted_in_anon_vma = true;
2386
2387
2388
2389
2390
2391 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2392 pgoff = addr >> PAGE_SHIFT;
2393 faulted_in_anon_vma = false;
2394 }
2395
2396 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2397 return NULL;
2398 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2399 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2400 if (new_vma) {
2401
2402
2403
2404 if (unlikely(vma_start >= new_vma->vm_start &&
2405 vma_start < new_vma->vm_end)) {
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418 VM_BUG_ON(faulted_in_anon_vma);
2419 *vmap = vma = new_vma;
2420 }
2421 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2422 } else {
2423 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2424 if (new_vma) {
2425 *new_vma = *vma;
2426 new_vma->vm_start = addr;
2427 new_vma->vm_end = addr + len;
2428 new_vma->vm_pgoff = pgoff;
2429 pol = mpol_dup(vma_policy(vma));
2430 if (IS_ERR(pol))
2431 goto out_free_vma;
2432 vma_set_policy(new_vma, pol);
2433 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2434 if (anon_vma_clone(new_vma, vma))
2435 goto out_free_mempol;
2436 if (new_vma->vm_file)
2437 get_file(new_vma->vm_file);
2438 if (new_vma->vm_ops && new_vma->vm_ops->open)
2439 new_vma->vm_ops->open(new_vma);
2440 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2441 *need_rmap_locks = false;
2442 }
2443 }
2444 return new_vma;
2445
2446 out_free_mempol:
2447 mpol_put(pol);
2448 out_free_vma:
2449 kmem_cache_free(vm_area_cachep, new_vma);
2450 return NULL;
2451}
2452
2453
2454
2455
2456
2457int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2458{
2459 unsigned long cur = mm->total_vm;
2460 unsigned long lim;
2461
2462 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2463
2464 if (cur + npages > lim)
2465 return 0;
2466 return 1;
2467}
2468
2469
2470static int special_mapping_fault(struct vm_area_struct *vma,
2471 struct vm_fault *vmf)
2472{
2473 pgoff_t pgoff;
2474 struct page **pages;
2475
2476
2477
2478
2479
2480
2481
2482 pgoff = vmf->pgoff - vma->vm_pgoff;
2483
2484 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2485 pgoff--;
2486
2487 if (*pages) {
2488 struct page *page = *pages;
2489 get_page(page);
2490 vmf->page = page;
2491 return 0;
2492 }
2493
2494 return VM_FAULT_SIGBUS;
2495}
2496
2497
2498
2499
2500static void special_mapping_close(struct vm_area_struct *vma)
2501{
2502}
2503
2504static const struct vm_operations_struct special_mapping_vmops = {
2505 .close = special_mapping_close,
2506 .fault = special_mapping_fault,
2507};
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518int install_special_mapping(struct mm_struct *mm,
2519 unsigned long addr, unsigned long len,
2520 unsigned long vm_flags, struct page **pages)
2521{
2522 int ret;
2523 struct vm_area_struct *vma;
2524
2525 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2526 if (unlikely(vma == NULL))
2527 return -ENOMEM;
2528
2529 INIT_LIST_HEAD(&vma->anon_vma_chain);
2530 vma->vm_mm = mm;
2531 vma->vm_start = addr;
2532 vma->vm_end = addr + len;
2533
2534 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2535 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2536
2537 vma->vm_ops = &special_mapping_vmops;
2538 vma->vm_private_data = pages;
2539
2540 ret = insert_vm_struct(mm, vma);
2541 if (ret)
2542 goto out;
2543
2544 mm->total_vm += len >> PAGE_SHIFT;
2545
2546 perf_event_mmap(vma);
2547
2548 return 0;
2549
2550out:
2551 kmem_cache_free(vm_area_cachep, vma);
2552 return ret;
2553}
2554
2555static DEFINE_MUTEX(mm_all_locks_mutex);
2556
2557static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2558{
2559 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2560
2561
2562
2563
2564 mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574 if (__test_and_set_bit(0, (unsigned long *)
2575 &anon_vma->root->rb_root.rb_node))
2576 BUG();
2577 }
2578}
2579
2580static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2581{
2582 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2593 BUG();
2594 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
2595 }
2596}
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630int mm_take_all_locks(struct mm_struct *mm)
2631{
2632 struct vm_area_struct *vma;
2633 struct anon_vma_chain *avc;
2634
2635 BUG_ON(down_read_trylock(&mm->mmap_sem));
2636
2637 mutex_lock(&mm_all_locks_mutex);
2638
2639 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2640 if (signal_pending(current))
2641 goto out_unlock;
2642 if (vma->vm_file && vma->vm_file->f_mapping)
2643 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2644 }
2645
2646 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2647 if (signal_pending(current))
2648 goto out_unlock;
2649 if (vma->anon_vma)
2650 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2651 vm_lock_anon_vma(mm, avc->anon_vma);
2652 }
2653
2654 return 0;
2655
2656out_unlock:
2657 mm_drop_all_locks(mm);
2658 return -EINTR;
2659}
2660
2661static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2662{
2663 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676 if (!__test_and_clear_bit(0, (unsigned long *)
2677 &anon_vma->root->rb_root.rb_node))
2678 BUG();
2679 anon_vma_unlock(anon_vma);
2680 }
2681}
2682
2683static void vm_unlock_mapping(struct address_space *mapping)
2684{
2685 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2686
2687
2688
2689
2690 mutex_unlock(&mapping->i_mmap_mutex);
2691 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2692 &mapping->flags))
2693 BUG();
2694 }
2695}
2696
2697
2698
2699
2700
2701void mm_drop_all_locks(struct mm_struct *mm)
2702{
2703 struct vm_area_struct *vma;
2704 struct anon_vma_chain *avc;
2705
2706 BUG_ON(down_read_trylock(&mm->mmap_sem));
2707 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2708
2709 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2710 if (vma->anon_vma)
2711 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2712 vm_unlock_anon_vma(avc->anon_vma);
2713 if (vma->vm_file && vma->vm_file->f_mapping)
2714 vm_unlock_mapping(vma->vm_file->f_mapping);
2715 }
2716
2717 mutex_unlock(&mm_all_locks_mutex);
2718}
2719
2720
2721
2722
2723void __init mmap_init(void)
2724{
2725 int ret;
2726
2727 ret = percpu_counter_init(&vm_committed_as, 0);
2728 VM_BUG_ON(ret);
2729}
2730